my data looks like this
df<- structure(list(team_3_F = c("browingal ", "team ", "browingal ",
"browingal ", "team ", "browingal ", "browingal ", "browingal ",
"browingal ", "newyorkish", "newyorkish", "site", "site", "browingal ",
"browingal ", "site", "site", "team ", "team ", "team ", "site",
"site", "browingal ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "newyorkish", "newyorkish", "team ", "team ",
"team ", "browingal ", "browingal "), AAA_US = c(0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 88L, 5L, 0L, 0L, 0L, 0L, 0L, 45L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 11L, 1L, 0L, 0L, 19L, 0L, 0L), BBB_US = c(0L, 0L,
2L, 3L, 4L, 2L, 1L, 0L, 1L, 0L, 3L, 8L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 45L, 0L, 0L, 0L, 18L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 19L, 1L, 0L)), class = "data.frame", row.names = c(NA,
-44L))
I want to sum those rows that both columns have non-zero values , so the output should look like this
df<- structure(list(team_3_F = c("browingal ", "team ", "browingal ",
"browingal ", "team ", "browingal ", "browingal ", "browingal ",
"browingal ", "newyorkish", "newyorkish", "site", "site", "browingal ",
"browingal ", "site", "site", "team ", "team ", "team ", "site",
"site", "browingal ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "newyorkish", "newyorkish", "team ", "team ",
"team ", "browingal ", "browingal "), AAA_US = c(0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 88L, 5L, 0L, 0L, 0L, 0L, 0L, 45L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 11L, 1L, 0L, 0L, 19L, 0L, 0L), BBB_US = c(0L, 0L,
2L, 3L, 4L, 2L, 1L, 0L, 1L, 0L, 3L, 8L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 45L, 0L, 0L, 0L, 18L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 19L, 1L, 0L), NEW = c(0L, 0L,
3L, 0L, 5L, 0L, 0L, 0L, 2L, 0L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 47L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 38L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-44L))
I have tried to sum with various version of the following but I could not figure how to fix it
df$new <- rowSums(df[ , c(2,3)], na.rm=FALSE)
df$new <- rowSums(df[ , c(2,3)], na.rm=TRUE)
CodePudding user response:
We can multiply the rowSums
output with a logical vector that only becomes TRUE when all the elements in the same row are not 0 (create the logical vector with rowSums
on a logical matrix (df[, 2:3] != 0
)
df$new <- rowSums(df[, 2:3]) * (rowSums(df[, 2:3] != 0) == 2)
-testing
> all.equal(df$new, out$NEW)
[1] TRUE
-output
> df
team_3_F AAA_US BBB_US new
1 browingal 0 0 0
2 team 0 0 0
3 browingal 1 2 3
4 browingal 0 3 0
5 team 1 4 5
6 browingal 0 2 0
7 browingal 0 1 0
8 browingal 0 0 0
9 browingal 1 1 2
10 newyorkish 88 0 0
11 newyorkish 5 3 8
12 site 0 8 0
13 site 0 0 0
14 browingal 0 0 0
15 browingal 0 0 0
16 site 0 0 0
17 site 45 0 0
18 team 0 0 0
19 team 0 0 0
20 team 1 1 2
21 site 0 0 0
22 site 0 0 0
23 browingal 0 2 0
24 team 0 0 0
25 team 0 0 0
26 team 0 0 0
27 team 0 0 0
28 team 2 45 47
29 team 0 0 0
30 team 0 0 0
31 team 0 0 0
32 team 0 18 0
33 team 0 0 0
34 team 0 0 0
35 team 0 0 0
36 team 0 1 0
37 team 0 0 0
38 newyorkish 11 0 0
39 newyorkish 1 0 0
40 team 0 0 0
41 team 0 0 0
42 team 19 19 38
43 browingal 0 1 0
44 browingal 0 0 0
NOTE: Here, out
is the expected output - renamed as both the input and output are named with same object df
Or if there are only two columns, an option can be
library(dplyr)
df %>%
mutate(new = case_when(AAA_US != 0 & BBB_US != 0 ~
AAA_US BBB_US, TRUE ~ 0L))
CodePudding user response:
Here is a possible base R option, which will work if you have additional columns to consider. First, I return a logical using df[,-1] != 0
to determine whether a value is not 0, then I use rowSums
to get the total of TRUE
values. Then, if the number of non-zero values equals the number of columns (excluding the team_3_F
column) (i.e., == ncol(df[,-1]
), then we can get the sum of that row, and if not, then return 0.
df$NEW <- ifelse(rowSums(df[,-1] != 0) == ncol(df[,-1]), rowSums(df[,-1]), 0)
Testing
all.equal(df_expected$NEW, df$NEW)
#[1] TRUE
Output
team_3_F AAA_US BBB_US NEW
1 browingal 0 0 0
2 team 0 0 0
3 browingal 1 2 3
4 browingal 0 3 0
5 team 1 4 5
6 browingal 0 2 0
7 browingal 0 1 0
8 browingal 0 0 0
9 browingal 1 1 2
10 newyorkish 88 0 0
11 newyorkish 5 3 8
12 site 0 8 0
13 site 0 0 0
14 browingal 0 0 0
15 browingal 0 0 0
16 site 0 0 0
17 site 45 0 0
18 team 0 0 0
19 team 0 0 0
20 team 1 1 2
21 site 0 0 0
22 site 0 0 0
23 browingal 0 2 0
24 team 0 0 0
25 team 0 0 0
26 team 0 0 0
27 team 0 0 0
28 team 2 45 47
29 team 0 0 0
30 team 0 0 0
31 team 0 0 0
32 team 0 18 0
33 team 0 0 0
34 team 0 0 0
35 team 0 0 0
36 team 0 1 0
37 team 0 0 0
38 newyorkish 11 0 0
39 newyorkish 1 0 0
40 team 0 0 0
41 team 0 0 0
42 team 19 19 38
43 browingal 0 1 0
44 browingal 0 0 0