I have a data set for country-wise 4G internet coverage for a certain period, in percentage. The data looks like this:
> head(new_data, 20)
# A tibble: 20 × 10
Country `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Afghanistan NULL NULL 0 0 0 4 7 22 26
2 Albania NA NA 0 35 80.2 85.3 85.5 95 98.38
3 Algeria 0 0 0 0 3.62 30.49 52.84 53.63 76.180000000000007
4 Andorra NA NA NULL 50 50 85 85 85 85
5 Angola NA NULL NULL 7 8 8 8 18 30
6 Antigua and Barbuda 65 78.599999999999994 80 98 99 99 NA 99 99
7 Argentina NA NULL 0 65 85 85 90.76 91.18 97.68
8 Armenia 17.54 44 46 46.5 52.48 90.05 99.1 99.3 100
9 Australia 52.21 85 95 94 98 99 99.2 99.4 99.5
10 Austria 31.57 58.41 85 98 98 98 98 98 98
11 Azerbaijan 6.68 14.31 49 39 41 42 49 93 93
12 Bahamas NA NA 35 95 95 95 95 95 95
13 Bahrain NA 88.55 95.6 95.6 100 100 100 100 100
14 Bangladesh NULL NULL 59 65 65 67 79 82 97.8
15 Barbados NA NA 0 0 20 99.6 99.6 99.6 99.6
16 Belarus NA NA 6 20.399999999999999 41.5 68.5 75.7 75.7 89.5
17 Belgium 12.23 51.7 88 99.89 100 100 100 100 100
18 Belize NA NA NA 0 46.94 65 68 70 70
19 Benin NA NA NULL 11.52 23.89 39.72 40 45 46
20 Bhutan NULL 5 10.72 40 53 55 60 78 80
How do I impute the data row-wise (i.e. country-wise) keeping in mind that the values should be always between 0 and 100?
Edit
> dput(head(new_data, 20))
structure(list(Country = c("Afghanistan", "Albania", "Algeria",
"Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia",
"Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh",
"Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan"
), `2012` = c("NULL", NA, "0", NA, NA, "65", NA, "17.54", "52.21",
"31.57", "6.68", NA, NA, "NULL", NA, NA, "12.23", NA, NA, "NULL"
), `2013` = c("NULL", NA, "0", NA, "NULL", "78.599999999999994",
"NULL", "44", "85", "58.41", "14.31", NA, "88.55", "NULL", NA,
NA, "51.7", NA, NA, "5"), `2014` = c("0", "0", "0", "NULL", "NULL",
"80", "0", "46", "95", "85", "49", "35", "95.6", "59", "0", "6",
"88", NA, "NULL", "10.72"), `2015` = c("0", "35", "0", "50",
"7", "98", "65", "46.5", "94", "98", "39", "95", "95.6", "65",
"0", "20.399999999999999", "99.89", "0", "11.52", "40"), `2016` = c("0",
"80.2", "3.62", "50", "8", "99", "85", "52.48", "98", "98", "41",
"95", "100", "65", "20", "41.5", "100", "46.94", "23.89", "53"
), `2017` = c("4", "85.3", "30.49", "85", "8", "99", "85", "90.05",
"99", "98", "42", "95", "100", "67", "99.6", "68.5", "100", "65",
"39.72", "55"), `2018` = c("7", "85.5", "52.84", "85", "8", NA,
"90.76", "99.1", "99.2", "98", "49", "95", "100", "79", "99.6",
"75.7", "100", "68", "40", "60"), `2019` = c("22", "95", "53.63",
"85", "18", "99", "91.18", "99.3", "99.4", "98", "93", "95",
"100", "82", "99.6", "75.7", "100", "70", "45", "78"), `2020` = c("26",
"98.38", "76.180000000000007", "85", "30", "99", "97.68", "100",
"99.5", "98", "93", "95", "100", "97.8", "99.6", "89.5", "100",
"70", "46", "80")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
CodePudding user response:
A suggestion to fill NA
with nearby values
df %>%
pivot_longer(-Country, names_to = "year") %>%
mutate(value = value %>% as.numeric()) %>%
group_by(Country) %>%
fill(value, .direction = "updown") %>%
pivot_wider(names_from = year,
values_from = value)
# A tibble: 20 x 10
# Groups: Country [20]
Country `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 0 0 0 0 0 4 7 22 26
2 Albania 0 0 0 35 80.2 85.3 85.5 95 98.4
3 Algeria 0 0 0 0 3.62 30.5 52.8 53.6 76.2
4 Andorra 50 50 50 50 50 85 85 85 85
5 Angola 7 7 7 7 8 8 8 18 30
6 Antigua and Bar~ 65 78.6 80 98 99 99 99 99 99
7 Argentina 0 0 0 65 85 85 90.8 91.2 97.7
8 Armenia 17.5 44 46 46.5 52.5 90.0 99.1 99.3 100
9 Australia 52.2 85 95 94 98 99 99.2 99.4 99.5
10 Austria 31.6 58.4 85 98 98 98 98 98 98
11 Azerbaijan 6.68 14.3 49 39 41 42 49 93 93
12 Bahamas 35 35 35 95 95 95 95 95 95
13 Bahrain 88.6 88.6 95.6 95.6 100 100 100 100 100
14 Bangladesh 59 59 59 65 65 67 79 82 97.8
15 Barbados 0 0 0 0 20 99.6 99.6 99.6 99.6
16 Belarus 6 6 6 20.4 41.5 68.5 75.7 75.7 89.5
17 Belgium 12.2 51.7 88 99.9 100 100 100 100 100
18 Belize 0 0 0 0 46.9 65 68 70 70
19 Benin 11.5 11.5 11.5 11.5 23.9 39.7 40 45 46
20 Bhutan 5 5 10.7 40 53 55 60 78 80