How to convert a stacked long dataframe to wide format-CodePudding

I am trying to process a json file and halfway through I end up with this

data1

name	value
data_id1	8538u40952
data_id2	40942094i2
data_text1	la la la
data_text2	we love pie eating and pie making
data_metrics_likes1	0
data_metrics_likes2	5
data_users_id1	284u94u20942
data_users_id2	094200220030

How do I get the data to look like this

text id	text	likes	userid
8538u40952	la la la	0	284u94u20942
40942094i2	we love pie eating and pie making	5	094200220030

CodePudding user response：

library(tidyverse)

data <- tribble(
  ~name, ~value,
  "data_id1", "8538u40952",
  "data_id2", "40942094i2",
  "data_text1", "la la la",
  "data_text2", "we love pie eating and pie making",
  "data_metrics_likes1", "0",
  "data_metrics_likes2", "5",
  "data_users_id1", "284u94u20942",
  "data_users_id2", "094200220030"
)

data %>%
  mutate(
    id2 = name %>% str_extract("[0-9] $"), # ensure unique rows
    name = name %>% str_remove("[0-9] $") %>% str_remove("^data_")
  ) %>%
  pivot_wider(names_from = name, values_from = value) %>%
  select(`text id` = id, text, likes=metrics_likes, userid=users_id) %>%
  type_convert()
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   `text id` = col_character(),
#>   text = col_character(),
#>   likes = col_double(),
#>   userid = col_character()
#> )
#> # A tibble: 2 × 4
#>   `text id`  text                              likes userid      
#>   <chr>      <chr>                             <dbl> <chr>       
#> 1 8538u40952 la la la                              0 284u94u20942
#> 2 40942094i2 we love pie eating and pie making     5 094200220030

^{Created on 2022-05-05 by the reprex package (v2.0.0)}

CodePudding user response：

With tidyr, You can separate the name column by extract() and then transform the data to a wide form.

library(tidyr)

data %>%
  extract(name, c("name", "row"), "data_(. )(\\d )") %>%
  pivot_wider()

# # A tibble: 2 x 5
#   row   id         text                              metrics_likes users_id    
#   <chr> <chr>      <chr>                             <chr>         <chr>
# 1 1     8538u40952 la la la                          0             284u94u20942
# 2 2     40942094i2 we love pie eating and pie making 5             094200220030

CodePudding user response：

A base R option using reshape

reshape(
    transform(
        df,
        id = gsub(".*(\\d )$", "\\1", name),
        name = gsub(".*?_(.*)\\d $", "\\1", name)
    ),
    direction = "wide",
    idvar = "id",
    timevar = "name"
)

gives

  id   value.id                        value.text value.metrics_likes
1  1 8538u40952                          la la la                   0
2  2 40942094i2 we love pie eating and pie making                   5
  value.users_id
1   284u94u20942
2   094200220030