Sorry for change the question for I cannot ask for a new one.
Original data is
D T C
<dbl> <dbl> <chr>
1 1 2000 A
2 2 2000 A
3 3 2000 A
4 1 2000 B
5 5 2000 B
6 6 2000 B
7 1 2001 A
8 2 2001 A
9 3 2001 B
What I need is
D T C count
<dbl> <dbl> <chr> <int>
1 1 2000 A 3
2 2 2000 A 3
3 3 2000 A 3
4 1 2000 B 3
5 5 2000 B 3
6 6 2000 B 3
7 1 2001 A 2
8 2 2001 A 2
9 3 2001 B 1
I want count the cell D by_group(T,C)
However the answer code
sample%>%group_by(T,C) %>% mutate(count = n_distinct(D))
Which count is 5 for all
CodePudding user response:
If we want to create a column of counts, use `add_count
library(dplyr)
sample <- sample %>%
add_count(T, C, name = 'count')
-output
sample
D T C count
1 1 2000 A 3
2 2 2000 A 3
3 3 2000 A 3
4 1 2000 B 3
5 5 2000 B 3
6 6 2000 B 3
7 1 2001 A 2
8 2 2001 A 2
9 3 2001 B 1
data
sample <- structure(list(D = c(1L, 2L, 3L, 1L, 5L, 6L, 1L, 2L, 3L), T = c(2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2001L, 2001L, 2001L), C = c("A",
"A", "A", "B", "B", "B", "A", "A", "B")), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9"))
CodePudding user response:
Using base R:
df <- structure(list(cty_fips = structure(c("01001", "01001", "01001",
"01001", "01001", "01001", "01001", "01001", "01001", "01003",
"01003", "01003", "01003", "01003", "01003", "01003", "01003",
"01003", "01005", "01005"), format.stata = "%-9s"),
congressional_district = structure(c("02",
"02", "02", "02", "02", "02", "02", "02", "02", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "02", "02"), format.stata = "%-9s"),
congress_number = structure(c(109, 110, 111, 112, 113, 114,
115, 116, 108, 109, 110, 111, 112, 113, 114, 115, 116, 108,
109, 110), format.stata = ".0g"),
state_name = structure(c("AL",
"AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
"AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"), format.stata = "%-9s"),
congressionaldistrict = structure(c("AL 2", "AL 2", "AL 2",
"AL 2", "AL 2", "AL 2", "AL 2", "AL 2", "AL 2", "AL 1", "AL 1",
"AL 1", "AL 1", "AL 1", "AL 1", "AL 1", "AL 1", "AL 1", "AL 2",
"AL 2"), format.stata = "%-9s")),
row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
# using table
table(df$congressional_district, df$congress_number) |>
melt() |>
`colnames<-`(c('congressional_district', 'congress_number', 'count'))
# using tapply
tapply(df$cty_fips, list(df$congressional_district, df$congress_number), length) |>
melt() |>
`colnames<-`(c('congressional_district', 'congress_number', 'count'))