I have a dataframe such as :
VALUE Names
1 Sp1,Sp2
1 Sp2
2 Sp3,Sp4
4 Sp2,Sp1
And I would like to count for each df$Names
in coma separated way, the sum of df$VALUE
and get a new dataframe such as :
Name Sum
Sp1 5
Sp2 6
Sp3 2
Sp4 2
Here is the dataframe in dput format if it can helps:
structure(list(VALUE = c(1L, 1L, 2L, 4L), Names = c("Sp1,Sp2",
"Sp2", "Sp3,Sp4", "Sp2,Sp1")), class = "data.frame", row.names = c(NA,
-4L))
CodePudding user response:
We may use separate_rows
to split the 'Names' column, and then do a group by sum
library(dplyr)
library(tidyr)
df1 %>%
separate_rows(Names) %>%
group_by(Names) %>%
summarise(Sum = sum(VALUE))
-output
# A tibble: 4 × 2
Names Sum
<chr> <int>
1 Sp1 5
2 Sp2 6
3 Sp3 2
4 Sp4 2
CodePudding user response:
Here is a base R solution.
s <- strsplit(as.character(df1$Names), ",")
s <- do.call(rbind, lapply(seq_along(s), \(i) {
data.frame(Sum = rep(df1$VALUE[i], length(s[[i]])), Names = s[[i]])
}))
aggregate(Sum ~ Names, s, sum)
# Names Sum
#1 Sp1 5
#2 Sp2 6
#3 Sp3 2
#4 Sp4 2
CodePudding user response:
This does the trick.
library(tidyverse)
x = structure(list(VALUE = c(1L, 1L, 2L, 4L), Names = c("Sp1,Sp2",
"Sp2", "Sp3,Sp4", "Sp2,Sp1")), class = "data.frame", row.names = c(NA,
-4L))
y = mutate(x,
var1 = str_split(Names, "\\,", simplify = T)[,1],
var2 = str_split(Names, "\\,", simplify = T)[,2]) %>%
pivot_longer(c(var1,var2), names_to = "var", values_to = "Name") %>%
select(-var, Names) %>%
filter(Name != "") %>%
group_by(Name) %>%
summarise(Sum = sum(VALUE))