I have the following dataset:
id_municipio year Vivo Claro TIM Oi Algar Sercomtel MVNO
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1100015 2009 65.4 13.0 28.2 90.8 0 0 0
2 1100023 2009 775. 154. 334. 1076. 0 0 0
3 1100031 2009 35.2 6.98 15.2 48.8 0 0 0
4 1100049 2009 634. 126. 273. 880. 0 0 0
5 1100056 2009 122. 24.2 52.6 169. 0 0 0
6 1100064 2009 135. 26.7 58.2 187. 0 0 0
I want to create another column by "id_municipio"
and "year"
which add 1 if the remaining columns have a value bigger than 0. Here is what a I to have:
id_municipio year Vivo Claro TIM Oi Algar Sercomtel MVNO total Providers
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1100015 2009 65.4 13.0 28.2 90.8 0 0 0 197. 0
2 1100023 2009 775. 154. 334. 1076. 0 0 0 2339. 0
3 1100031 2009 35.2 6.98 15.2 48.8 0 0 0 106. 0
4 1100049 2009 634. 126. 273. 880. 0 0 0 1912. 0
5 1100056 2009 122. 24.2 52.6 169. 0 0 0 368. 0
6 1100064 2009 135. 26.7 58.2 187. 0 0 0 407. 0
I know it would be something like this but I am stuck:
library(dplyr)
dataset %>% group_by(id_municipio, year) %>% mutate(Providers =...)
CodePudding user response:
We can sum across columns in a row matching the condition:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df <- structure(list(
id_municipio = c(1100015, 1100023, 1100031, 1100049,
1100056, 1100064),
year = c(2009, 2009, 2009, 2009, 2009, 2009),
Vivo = c(65.4, 775, 35.2, 634, 122, 135),
Claro = c(13, 154,
6.98, 126, 24.2, 26.7),
TIM = c(28.2, 334, 15.2, 273, 52.6, 58.2),
Oi = c(90.8, 1076, 48.8, 880, 169, 187),
Algar = c(0, 0, 0,
0, 0, 0),
Sercomtel = c(0, 0, 0, 0, 0, 0),
MVNO = c(0, 0, 0,
0, 0, 0)
), row.names = c(NA,-6L), spec = structure(list(
cols = list(
id_municipio = structure(list(), class = c("collector_double",
"collector")),
year = structure(list(), class = c("collector_double",
"collector")),
Vivo = structure(list(), class = c("collector_double",
"collector")),
Claro = structure(list(), class = c("collector_double",
"collector")),
TIM = structure(list(), class = c("collector_double",
"collector")),
Oi = structure(list(), class = c("collector_double",
"collector")),
Algar = structure(list(), class = c("collector_double",
"collector")),
Sercomtel = structure(list(), class = c("collector_double",
"collector")),
MVNO = structure(list(), class = c("collector_double",
"collector"))
),
default = structure(list(), class = c("collector_guess",
"collector")),
delim = ","
), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
df %>%
group_by(id_municipio, year) %>%
mutate(Providers = rowSums(across('Vivo':'MVNO') > 0))
#> # A tibble: 6 × 10
#> # Groups: id_municipio, year [6]
#> id_municipio year Vivo Claro TIM Oi Algar Sercomtel MVNO Providers
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1100015 2009 65.4 13 28.2 90.8 0 0 0 4
#> 2 1100023 2009 775 154 334 1076 0 0 0 4
#> 3 1100031 2009 35.2 6.98 15.2 48.8 0 0 0 4
#> 4 1100049 2009 634 126 273 880 0 0 0 4
#> 5 1100056 2009 122 24.2 52.6 169 0 0 0 4
#> 6 1100064 2009 135 26.7 58.2 187 0 0 0 4
Created on 2022-11-29 with reprex v2.0.2