Home > database >  Wide to long data in R
Wide to long data in R

Time:08-02

I have following data in R:

structure(list(Name = 1:4, Paper1 = c("C1", "C1", "C1", "C1"), 
Marks1 = 1:4, Paper2 = c("D1", "D1", "D1", "D1"), Marks2 = 1:4, 
Paper3 = c("E1", "E1", "E1", "E1"), Marks3 = 12:15), class = "data.frame", row.names = c(NA, -4L))

I want to arrange my data like this:

structure(list(Name = c(1L, 1L, 1L, 2L, 2L, 2L), Paper = c("C1", 
"D1", "E1", "C1", "D1", "E1"), Marks = c(1L, 1L, 12L, 2L, 2L, 
13L)), class = "data.frame", row.names = c(NA, -6L))

I tried, shape, melt, but both are not providing desired output. Please suggests the solution.

CodePudding user response:

For a shorter dplyr option, we could use pivot_longer using the names_sep-argument with either regex-look-around to separate before the digit..

library(dplyr)

df |> 
  pivot_longer(-Name, 
               names_to = c(".value", NA), 
               names_sep = "(?=\\d)")

.. or as pointed out by @Allan Cameron splitting at the fifth position: names_sep = 5.

If you want to keep the numbers of the corresponding papers and marks, you can replace NA in the names_to with your desired column name.

Output:

# A tibble: 12 × 3
    Name Paper Marks
   <int> <chr> <int>
 1     1 C1        1
 2     1 D1        1
 3     1 E1       12
 4     2 C1        2
 5     2 D1        2
 6     2 E1       13
 7     3 C1        3
 8     3 D1        3
 9     3 E1       14
10     4 C1        4
11     4 D1        4
12     4 E1       15

CodePudding user response:

One option with melt():

library(data.table)
setDT(df)

melt(
  df, 
  id.vars      = 'Name', 
  measure.vars = patterns('^Paper', '^Marks'),
  value.name   = c('Paper', 'Marks')
)[order(Name), !'variable']

#     Name Paper Marks
#  1:    1    C1     1
#  2:    1    D1     1
#  3:    1    E1    12
#  4:    2    C1     2
#  5:    2    D1     2
#  6:    2    E1    13
#  7:    3    C1     3
#  8:    3    D1     3
#  9:    3    E1    14
# 10:    4    C1     4
# 11:    4    D1     4
# 12:    4    E1    15

CodePudding user response:

Here's a somewhat simpler tidyverse solution

library(tidyverse)

df %>%
group_by(Name) %>%
  summarise(Paper = unlist(across(starts_with('Paper'), c)),
            Marks = unlist(across(starts_with('Marks'), c)))
#> # A tibble: 12 x 3
#> # Groups:   Name [4]
#>     Name Paper Marks
#>    <int> <chr> <int>
#>  1     1 C1        1
#>  2     1 D1        1
#>  3     1 E1       12
#>  4     2 C1        2
#>  5     2 D1        2
#>  6     2 E1       13
#>  7     3 C1        3
#>  8     3 D1        3
#>  9     3 E1       14
#> 10     4 C1        4
#> 11     4 D1        4
#> 12     4 E1       15

CodePudding user response:

A tidyverse way.

df1 <-
  structure(list(
    Name = 1:4, 
    Paper1 = c("C1", "C1", "C1", "C1"), 
    Marks1 = 1:4, 
    Paper2 = c("D1", "D1", "D1", "D1"), 
    Marks2 = 1:4, 
    Paper3 = c("E1", "E1", "E1", "E1"), 
    Marks3 = 12:15), 
    class = "data.frame", row.names = c(NA, -4L))

df2 <-
  structure(list(
    Name = c(1L, 1L, 1L, 2L, 2L, 2L), 
    Paper = c("C1", "D1", "E1", "C1", "D1", "E1"), 
    Marks = c(1L, 1L, 12L, 2L, 2L, 13L)), 
    class = "data.frame", row.names = c(NA, -6L))


suppressPackageStartupMessages(library(tidyverse))

df1 %>%
  mutate(across(starts_with("Marks"), as.character)) %>%
  pivot_longer(-Name, names_to = "PM") %>%
  mutate(PM = gsub("\\d ", "", PM)) %>% 
  group_by(PM) %>%
  mutate(id = row_number()) %>% 
  pivot_wider(
    id_cols = c(id, Name),
    names_from = PM,
    values_from = value
  ) %>%
  select(-id)
#> # A tibble: 12 × 3
#>     Name Paper Marks
#>    <int> <chr> <chr>
#>  1     1 C1    1    
#>  2     1 D1    1    
#>  3     1 E1    12   
#>  4     2 C1    2    
#>  5     2 D1    2    
#>  6     2 E1    13   
#>  7     3 C1    3    
#>  8     3 D1    3    
#>  9     3 E1    14   
#> 10     4 C1    4    
#> 11     4 D1    4    
#> 12     4 E1    15

Created on 2022-08-01 by the reprex package (v2.0.1)

CodePudding user response:

A base R option using reshape

reshape(
  df,
  direction = "long",
  idvar = "Name",
  varying = -1,
  v.names = c("Paper","Marks")
)

gives

    Name time Paper Marks
1.1    1    1     1    C1
2.1    2    1     2    C1
3.1    3    1     3    C1
4.1    4    1     4    C1
1.2    1    2     1    D1
2.2    2    2     2    D1
3.2    3    2     3    D1
4.2    4    2     4    D1
1.3    1    3    12    E1
2.3    2    3    13    E1
3.3    3    3    14    E1
4.3    4    3    15    E1

CodePudding user response:

Another possible solution, based on purrr::map2_dfr:

library(tidyverse)

map2_dfr(select(df, starts_with("Paper")), select(df, starts_with("Marks")),
         ~ tibble(Name = df$Name, Paper = .x, Marks = .y))

#> # A tibble: 12 × 3
#>     Name Paper Marks
#>    <int> <chr> <int>
#>  1     1 C1        1
#>  2     2 C1        2
#>  3     3 C1        3
#>  4     4 C1        4
#>  5     1 D1        1
#>  6     2 D1        2
#>  7     3 D1        3
#>  8     4 D1        4
#>  9     1 E1       12
#> 10     2 E1       13
#> 11     3 E1       14
#> 12     4 E1       15

CodePudding user response:

Another option using to_long from sjmisc:

df <- structure(list(Name = 1:4, Paper1 = c("C1", "C1", "C1", "C1"), 
                     Marks1 = 1:4, Paper2 = c("D1", "D1", "D1", "D1"), Marks2 = 1:4, 
                     Paper3 = c("E1", "E1", "E1", "E1"), Marks3 = 12:15), class = "data.frame", row.names = c(NA, -4L))

library(sjmisc)
output <- to_long(df, 
        key = 'group', 
        values = c('Paper', "Marks"), 
        grep("Paper", names(df)),
        grep("Marks", names(df)),
        recode.key = TRUE)[,-2]
output[order(output$Name),]
#>    Name Paper Marks
#> 1     1    C1     1
#> 5     1    D1     1
#> 9     1    E1    12
#> 2     2    C1     2
#> 6     2    D1     2
#> 10    2    E1    13
#> 3     3    C1     3
#> 7     3    D1     3
#> 11    3    E1    14
#> 4     4    C1     4
#> 8     4    D1     4
#> 12    4    E1    15

Created on 2022-08-01 by the reprex package (v2.0.1)

CodePudding user response:

Another way of doing it is removing the 1 from the names and then stack them with rbind

colnames(df) <- (colnames(df) |> stringr::str_extract("\\D "))


cbind(df[1], rbind(df[2:3],df[4:5],df[6:7]))
   Name Paper Marks
1     1    C1     1
2     2    C1     2
3     3    C1     3
4     4    C1     4
5     1    D1     1
6     2    D1     2
7     3    D1     3
8     4    D1     4
9     1    E1    12
10    2    E1    13
11    3    E1    14
12    4    E1    15

Less hard coded

cbind(df[1],
      do.call(rbind,
              lapply(seq(0, ncol(df) - 2, 2 ), \(x) {
              colnames(df) <- strsplit(colnames(df), "\\d")
  df[x   2:3]
})))

  • Related