Home > Enterprise >  Gathering columns without repetition
Gathering columns without repetition

Time:03-11

I am trying to align two sets of columns in order to compare them. Gathering the first set of columns followed by the second set leads to repetition.

Data:

  data <- data.frame(
    id=1:4,
    q1= 1:4,
    q1_1= 5:8,
    q2= 9:12,
    q2_1= 13:16
  )
> data
  id q1 q1_1 q2 q2_1
1  1  1    5  9   13
2  2  2    6 10   14
3  3  3    7 11   15
4  4  4    8 12   16

Desired result:

  id  key value key2 value2
1  1   q1     1   q2      9
2  2   q1     2   q2     10
3  3   q1     3   q2     11
4  4   q1     4   q2     12
5  1 q1_1     5 q2_1     13
6  2 q1_1     6 q2_1     14
7  3 q1_1     7 q2_1     15
8  4 q1_1     8 q2_1     16

Attempt:

data %>% gather(key, value, q1:q1_1) %>%
  gather(key2, value2, q2, q2_1)

CodePudding user response:

Split your data into multiple dataframes, gather and then combine column-wise.

library(tidyr)
library(dplyr)


data <- data.frame(
    id=1:4,
    q1= 1:4,
    q1_1= 5:8,
    q2= 9:12,
    q2_1= 13:16
  )

(d1 <- data  %>% select(id, starts_with('q1'))  %>% pivot_longer(names_to='key', values_to='value', cols=!id)  %>% 
arrange(key,id))

(d2 <- data  %>% select(id, starts_with('q2'))  %>% pivot_longer(names_to='key2', values_to='value2', cols=!id)  %>% 
arrange(key2,id)  %>% 
select(!id))

cbind(d1,d2)

CodePudding user response:

We may use reshape_longer from sjmisc

library(sjmisc)
reshape_longer(
  data,
  columns = list( c("q1", "q1_1"),
  c("q2", "q2_1")), names.to = c("key", "key2"), 
     values.to = c("value", "value2"), id = "id"
)

Or with to_long after changing the id column name

names(data)[1] <- "ID"
to_long(
   data = data,
   keys =  c("key", "key2"),
   values = c("value", "value2"),
   c("q1", "q1_1"),
   c("q2", "q2_1"))

-output

  ID  key value key2 value2
1  1   q1     1   q2      9
2  2   q1     2   q2     10
3  3   q1     3   q2     11
4  4   q1     4   q2     12
5  1 q1_1     5 q2_1     13
6  2 q1_1     6 q2_1     14
7  3 q1_1     7 q2_1     15
8  4 q1_1     8 q2_1     16

CodePudding user response:

This is also possible with pivot_longer:

library(tidyverse)

data %>% 
  pivot_longer(cols = c(q1, q1_1), names_to = "key") %>% 
  pivot_longer(cols = c(q2, q2_1), names_to = "key2", values_to = "value2") %>% 
  filter(nchar(key) == nchar(key2))

# A tibble: 8 x 5
     id key   value key2  value2
  <int> <chr> <int> <chr>  <int>
1     1 q1        1 q2         9
2     1 q1_1      5 q2_1      13
3     2 q1        2 q2        10
4     2 q1_1      6 q2_1      14
5     3 q1        3 q2        11
6     3 q1_1      7 q2_1      15
7     4 q1        4 q2        12
8     4 q1_1      8 q2_1      16
  • Related