I have following data in R:
structure(list(Name = 1:4, Paper1 = c("C1", "C1", "C1", "C1"),
Marks1 = 1:4, Paper2 = c("D1", "D1", "D1", "D1"), Marks2 = 1:4,
Paper3 = c("E1", "E1", "E1", "E1"), Marks3 = 12:15), class = "data.frame", row.names = c(NA, -4L))
I want to arrange my data like this:
structure(list(Name = c(1L, 1L, 1L, 2L, 2L, 2L), Paper = c("C1",
"D1", "E1", "C1", "D1", "E1"), Marks = c(1L, 1L, 12L, 2L, 2L,
13L)), class = "data.frame", row.names = c(NA, -6L))
I tried, shape
, melt
, but both are not providing desired output. Please suggests the solution.
CodePudding user response:
For a shorter dplyr
option, we could use pivot_longer
using the names_sep
-argument with either regex-look-around to separate before the digit..
library(dplyr)
df |>
pivot_longer(-Name,
names_to = c(".value", NA),
names_sep = "(?=\\d)")
.. or as pointed out by @Allan Cameron splitting at the fifth position: names_sep = 5
.
If you want to keep the numbers of the corresponding papers and marks, you can replace NA
in the names_to
with your desired column name.
Output:
# A tibble: 12 × 3
Name Paper Marks
<int> <chr> <int>
1 1 C1 1
2 1 D1 1
3 1 E1 12
4 2 C1 2
5 2 D1 2
6 2 E1 13
7 3 C1 3
8 3 D1 3
9 3 E1 14
10 4 C1 4
11 4 D1 4
12 4 E1 15
CodePudding user response:
One option with melt()
:
library(data.table)
setDT(df)
melt(
df,
id.vars = 'Name',
measure.vars = patterns('^Paper', '^Marks'),
value.name = c('Paper', 'Marks')
)[order(Name), !'variable']
# Name Paper Marks
# 1: 1 C1 1
# 2: 1 D1 1
# 3: 1 E1 12
# 4: 2 C1 2
# 5: 2 D1 2
# 6: 2 E1 13
# 7: 3 C1 3
# 8: 3 D1 3
# 9: 3 E1 14
# 10: 4 C1 4
# 11: 4 D1 4
# 12: 4 E1 15
CodePudding user response:
Here's a somewhat simpler tidyverse solution
library(tidyverse)
df %>%
group_by(Name) %>%
summarise(Paper = unlist(across(starts_with('Paper'), c)),
Marks = unlist(across(starts_with('Marks'), c)))
#> # A tibble: 12 x 3
#> # Groups: Name [4]
#> Name Paper Marks
#> <int> <chr> <int>
#> 1 1 C1 1
#> 2 1 D1 1
#> 3 1 E1 12
#> 4 2 C1 2
#> 5 2 D1 2
#> 6 2 E1 13
#> 7 3 C1 3
#> 8 3 D1 3
#> 9 3 E1 14
#> 10 4 C1 4
#> 11 4 D1 4
#> 12 4 E1 15
CodePudding user response:
A tidyverse
way.
df1 <-
structure(list(
Name = 1:4,
Paper1 = c("C1", "C1", "C1", "C1"),
Marks1 = 1:4,
Paper2 = c("D1", "D1", "D1", "D1"),
Marks2 = 1:4,
Paper3 = c("E1", "E1", "E1", "E1"),
Marks3 = 12:15),
class = "data.frame", row.names = c(NA, -4L))
df2 <-
structure(list(
Name = c(1L, 1L, 1L, 2L, 2L, 2L),
Paper = c("C1", "D1", "E1", "C1", "D1", "E1"),
Marks = c(1L, 1L, 12L, 2L, 2L, 13L)),
class = "data.frame", row.names = c(NA, -6L))
suppressPackageStartupMessages(library(tidyverse))
df1 %>%
mutate(across(starts_with("Marks"), as.character)) %>%
pivot_longer(-Name, names_to = "PM") %>%
mutate(PM = gsub("\\d ", "", PM)) %>%
group_by(PM) %>%
mutate(id = row_number()) %>%
pivot_wider(
id_cols = c(id, Name),
names_from = PM,
values_from = value
) %>%
select(-id)
#> # A tibble: 12 × 3
#> Name Paper Marks
#> <int> <chr> <chr>
#> 1 1 C1 1
#> 2 1 D1 1
#> 3 1 E1 12
#> 4 2 C1 2
#> 5 2 D1 2
#> 6 2 E1 13
#> 7 3 C1 3
#> 8 3 D1 3
#> 9 3 E1 14
#> 10 4 C1 4
#> 11 4 D1 4
#> 12 4 E1 15
Created on 2022-08-01 by the reprex package (v2.0.1)
CodePudding user response:
A base R option using reshape
reshape(
df,
direction = "long",
idvar = "Name",
varying = -1,
v.names = c("Paper","Marks")
)
gives
Name time Paper Marks
1.1 1 1 1 C1
2.1 2 1 2 C1
3.1 3 1 3 C1
4.1 4 1 4 C1
1.2 1 2 1 D1
2.2 2 2 2 D1
3.2 3 2 3 D1
4.2 4 2 4 D1
1.3 1 3 12 E1
2.3 2 3 13 E1
3.3 3 3 14 E1
4.3 4 3 15 E1
CodePudding user response:
Another possible solution, based on purrr::map2_dfr
:
library(tidyverse)
map2_dfr(select(df, starts_with("Paper")), select(df, starts_with("Marks")),
~ tibble(Name = df$Name, Paper = .x, Marks = .y))
#> # A tibble: 12 × 3
#> Name Paper Marks
#> <int> <chr> <int>
#> 1 1 C1 1
#> 2 2 C1 2
#> 3 3 C1 3
#> 4 4 C1 4
#> 5 1 D1 1
#> 6 2 D1 2
#> 7 3 D1 3
#> 8 4 D1 4
#> 9 1 E1 12
#> 10 2 E1 13
#> 11 3 E1 14
#> 12 4 E1 15
CodePudding user response:
Another option using to_long
from sjmisc
:
df <- structure(list(Name = 1:4, Paper1 = c("C1", "C1", "C1", "C1"),
Marks1 = 1:4, Paper2 = c("D1", "D1", "D1", "D1"), Marks2 = 1:4,
Paper3 = c("E1", "E1", "E1", "E1"), Marks3 = 12:15), class = "data.frame", row.names = c(NA, -4L))
library(sjmisc)
output <- to_long(df,
key = 'group',
values = c('Paper', "Marks"),
grep("Paper", names(df)),
grep("Marks", names(df)),
recode.key = TRUE)[,-2]
output[order(output$Name),]
#> Name Paper Marks
#> 1 1 C1 1
#> 5 1 D1 1
#> 9 1 E1 12
#> 2 2 C1 2
#> 6 2 D1 2
#> 10 2 E1 13
#> 3 3 C1 3
#> 7 3 D1 3
#> 11 3 E1 14
#> 4 4 C1 4
#> 8 4 D1 4
#> 12 4 E1 15
Created on 2022-08-01 by the reprex package (v2.0.1)
CodePudding user response:
Another way of doing it is removing the 1 from the names and then stack them with rbind
colnames(df) <- (colnames(df) |> stringr::str_extract("\\D "))
cbind(df[1], rbind(df[2:3],df[4:5],df[6:7]))
Name Paper Marks
1 1 C1 1
2 2 C1 2
3 3 C1 3
4 4 C1 4
5 1 D1 1
6 2 D1 2
7 3 D1 3
8 4 D1 4
9 1 E1 12
10 2 E1 13
11 3 E1 14
12 4 E1 15
Less hard coded
cbind(df[1],
do.call(rbind,
lapply(seq(0, ncol(df) - 2, 2 ), \(x) {
colnames(df) <- strsplit(colnames(df), "\\d")
df[x 2:3]
})))