I have a dataframe that is complicated and Im trying to reshape it.
Here is an example of the type of data frame that I have:
names <- c("var1", 'var2', "split")
values <- rnorm(8)
from <- data.frame(a = rep(1, 10),
b = c(rep(1,3), rep(2, 7)),
c = c(names, names, rep("split", 4)),
d = c(rep("NA", 5), names, rep("split", 2)),
e = c(rep("NA", 7), names),
f = c(values[1:2], "NA", values[3:8], "NA"))
And this produces something that looks like this:
> from
a b c d e f
1 1 1 var1 NA NA -0.271930473373158
2 1 1 var2 NA NA -0.0968100775823158
3 1 1 split NA NA NA
4 1 2 var1 NA NA -1.73919094720254
5 1 2 var2 NA NA -0.52398152119997
6 1 2 split var1 NA 0.856367467674763
7 1 2 split var2 NA -0.729762707907525
8 1 2 split split var1 0.561460771889416
9 1 2 split split var2 0.0432022687633195
10 1 2 split split split NA
Inside my data frame from
, I want to take var1
and var2
and turn them into columns. And then use the value from column f
in from
as the values that correspond to var1
and var2
(reading row-wise).
In other words, I am trying to reshape this data frame into something that looks like this:
> out
a b var1 var2
1 1 1 -0.2719305 -0.09681008
2 1 2 -1.7391909 -0.52398152
3 1 2 0.8563675 -0.72976271
4 1 2 0.5614608 0.04320227
Any suggestions as to how I could do this?
CodePudding user response:
We could reshape to 'long' with pivot_longer
, remove the NA
elements and filter
by keeping on the 'var' elements and then back to 'wide' with pivot_wider
library(dplyr)
library(tidyr)
library(stringr)
library(data.table)
from %>%
type.convert(as.is = TRUE) %>%
pivot_longer(cols = c:e, values_drop_na = TRUE) %>%
filter(str_detect(value, 'var')) %>%
select(-name) %>%
mutate(rn = rowid(a, b, value)) %>%
pivot_wider(names_from = value, values_from = f) %>%
select(-rn)
-output
# A tibble: 4 × 4
a b var1 var2
<int> <int> <dbl> <dbl>
1 1 1 -0.272 -0.0968
2 1 2 -1.74 -0.524
3 1 2 0.856 -0.730
4 1 2 0.561 0.0432
data
from <- structure(list(a = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
b = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), c = c("var1",
"var2", "split", "var1", "var2", "split", "split", "split",
"split", "split"), d = c("NA", "NA", "NA", "NA", "NA", "var1",
"var2", "split", "split", "split"), e = c("NA", "NA", "NA",
"NA", "NA", "NA", "NA", "var1", "var2", "split"), f = c("-0.271930473373158",
"-0.0968100775823158", "NA", "-1.73919094720254", "-0.52398152119997",
"0.856367467674763", "-0.729762707907525", "0.561460771889416",
"0.0432022687633195", "NA")), row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10"), class = "data.frame")
CodePudding user response:
This can be achieved by coupling a series of logical operations to get the values in from$f
data.frame( a=from$a[rowSums(from == "var1", na.rm=T) == 1],
b=from$b[rowSums(from == "var1", na.rm=T) == 1],
var1=from$f[rowSums(from == "var1", na.rm=T) == 1],
var2=from$f[rowSums(from == "var2", na.rm=T) == 1] )
a b var1 var2
1 1 1 -0.2719305 -0.09681008
2 1 2 -1.7391909 -0.52398152
3 1 2 0.8563675 -0.72976271
4 1 2 0.5614608 0.04320227
CodePudding user response:
Here is a solution with one time pivoting:
library(dplyr)
library(tidyr)
library(stringr)
from %>%
type.convert(as.is = TRUE) %>%
filter(!is.na(f)) %>%
mutate(name = str_extract_all(paste(c,d,e), 'var(.)')) %>%
select(a, b, f, name) %>%
pivot_wider(
names_from = name,
values_from = f,
values_fn = list
) %>%
unnest(cols = c(var1, var2))
a b var1 var2
<int> <int> <dbl> <dbl>
1 1 1 -0.272 -0.0968
2 1 2 -1.74 -0.524
3 1 2 0.856 -0.730
4 1 2 0.561 0.0432
CodePudding user response:
The notion is to have a row_number
mutation:
library(dplyr)
library(tidyr)
from %>%
type.convert(as.is = TRUE) %>%
filter(!is.na(f)) %>%
group_by(name = invoke(coalesce, across(c:e, na_if, 'split')))%>%
mutate(id = row_number()) %>%
pivot_wider(c(a, b, id), values_from = f) %>%
select(-id)
# A tibble: 4 x 4
a b var1 var2
<int> <int> <dbl> <dbl>
1 1 1 -0.272 -0.0968
2 1 2 -1.74 -0.524
3 1 2 0.856 -0.730
4 1 2 0.561 0.0432