I have two dataframes
df1
# var1 var2
# 1 X01 Red
# 2 X02 Green
# 3 X03 Red
# 4 X04 Yellow
# 5 X05 Red
# 6 X06 Green
df2
# X01 X02 X03 ...
# 1 1 0.1 2.1
# 2 2 0.2 2.2
# 3 3 0.3 2.3
# 4 4 0.4 2.4
# 5 5 0.5 2.5
# 6 6 0.6 2.6
Is it possible to get a new variable in df2 called 'Red' being the mean per observation?
df2
# X01 X02 X03 Red
# 1 1 0.1 2.1 mean obs1
# 2 2 0.2 2.2 mean obs2
# 3 3 0.3 2.3 mean obs3
# 4 4 0.4 2.4 mean obs4
# 5 5 0.5 2.5 mean obs5
# 6 6 0.6 2.6 mean obs6
CodePudding user response:
We can use base R
split
to split the second dataset based on the match
ing values of 'var1' from the first data, Filter
the list
elements to apply the rowMeans
only if there are more than 1 column, then loop over the list
, apply the rowMeans
and assign those as new columns in 'df2'
lst1 <- split.default(df2, df1$var2[match(names(df2), df1$var1)])
lst2 <- Filter(function(x) ncol(x) > 1, lst1)
tmp <- lapply(lst2,
rowMeans, na.rm = TRUE)
df2[names(tmp)] <- tmp
-output
> df2
X01 X02 X03 Red
1 1 0.1 2.1 1.55
2 2 0.2 2.2 2.10
3 3 0.3 2.3 2.65
4 4 0.4 2.4 3.20
5 5 0.5 2.5 3.75
6 6 0.6 2.6 4.30
Or in a for
loop
for(nm in unique(df1$var2)) {
v1 <- intersect(df1$var1[df1$var2 == nm], names(df2))
if(length(v1) > 1) df2[[nm]] <- rowMeans(df2[v1], na.rm = TRUE)
}
data
df1 <- structure(list(var1 = c("X01", "X02", "X03", "X04", "X05", "X06"
), var2 = c("Red", "Green", "Red", "Yellow", "Red", "Green")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
df2 <- structure(list(X01 = 1:6, X02 = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6
), X03 = c(2.1, 2.2, 2.3, 2.4, 2.5, 2.6)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
CodePudding user response:
library(tidyverse)
df2 %>%
rowid_to_column() %>%
pivot_longer(-rowid, names_to = 'var1') %>%
left_join(df1) %>%
pivot_wider(rowid, names_from = var2, values_fn = mean) %>%
cbind(df2,.)
X01 X02 X03 rowid Red Green
1 1 0.1 2.1 1 1.55 0.1
2 2 0.2 2.2 2 2.10 0.2
3 3 0.3 2.3 3 2.65 0.3
4 4 0.4 2.4 4 3.20 0.4
5 5 0.5 2.5 5 3.75 0.5
6 6 0.6 2.6 6 4.30 0.6