7.1 Remove columns not needed for the current analysis
rates <- mort %>%
select(-c(Location_of_death, Marital_status))
rates
## # A tibble: 60,143 x 11
## ID Sex Cause_of_death dbuid2016 Postalcode B_year B_month B_day D_year D_month D_day
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1000001 M D47 13070175019 E1A1E9 1943 7 8 2016 9 22
## 2 1000002 F C34 48060924001 T2E0T2 1943 5 22 2016 12 14
## 3 1000003 M C26 35204502002 M4J1L1 1953 8 9 2016 4 21
## 4 1000004 M C16 24663048013 H9H1B4 1923 6 14 2016 11 21
## 5 1000005 M C22 24560249013 J2W2B6 1931 8 8 2016 4 4
## 6 1000006 F C50 35250845002 L8H2K1 1933 11 4 2016 2 6
## 7 1000007 F C85 35370763005 N9Y3X5 1918 5 20 2016 2 9
## 8 1000008 F C56 35250788003 L8H7G2 1972 8 18 2016 6 13
## 9 1000009 F C64 24662429005 H9P2B3 1922 7 23 2016 5 25
## 10 1000010 M C22 48111094001 T5M1G2 1953 2 7 2016 1 16
## # i 60,133 more rows
7.2 Check and remove duplicate records
rates_dup <- rates %>%
count(ID) %>%
filter(n > 1)
rates_dup
## # A tibble: 23 x 2
## ID n
## <dbl> <int>
## 1 1000170 2
## 2 1000662 2
## 3 1001349 2
## 4 1001352 2
## 5 1004287 2
## 6 1004618 2
## 7 1004869 2
## 8 1005245 2
## 9 1008199 2
## 10 1012433 2
## # i 13 more rows
rates %>%
filter(ID %in% rates_dup$ID)
## # A tibble: 47 x 11
## ID Sex Cause_of_death dbuid2016 Postalcode B_year B_month B_day D_year D_month D_day
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1000170 F C91 12060095005 B4V1T1 1922 7 25 2016 7 22
## 2 1000170 F C91 12060095005 B4V1T1 1922 7 25 2016 7 22
## 3 1000662 M C34 24720038001 J7P4H7 1965 3 29 2016 5 2
## 4 1000662 M C34 24720038001 J7P4H7 1965 3 29 2016 5 2
## 5 1001349 M C16 24370129009 G9A5H9 1961 4 6 2016 5 24
## 6 1001349 M C16 24370129009 G9A5H9 1961 4 6 2016 5 24
## 7 1001352 M C24 48010141007 T1B3E3 1956 12 27 2016 4 28
## 8 1001352 M C24 48010141007 T1B3E3 1956 12 27 2016 4 28
## 9 1004287 M C34 35320194004 N4S2E1 1949 2 18 2016 1 19
## 10 1004287 M C34 35320194004 N4S2E1 1949 2 18 2016 1 19
## # i 37 more rows
rates <- rates %>%
distinct()