Home > Mobile >  Change row item to new column in R
Change row item to new column in R

Time:03-08

I know the wording is a bit complicated. However, the image below should make it clear. Basically wherever exp_stat = 1 we split those ids and allocate ids which had exp_stat = 0 (Grouped by subset).

Here is my data:

df <- structure(list(id = c("24GMHA78TGM7D", "6E1T23X96Y35R", "9N6GXF4DZYQR9", 
"0007HGS378G8W", "7DHCHR11GCZ6C", "BG81E8NJN8M3R", "0007HGS378G8Z", 
"7DHCHR11GCZ6D", "BG81E8NJN8M3Q"), exp_stat = c(1L, 1L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L), subset = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L)), class = "data.frame", row.names = c(NA, -9L))

Here is the expected output:

structure(list(id = c("24GMHA78TGM7D", "24GMHA78TGM7D", "6E1T23X96Y35R", 
"6E1T23X96Y35R", "0007HGS378G8Z", "0007HGS378G8Z"), id_1 = c("9N6GXF4DZYQR9", 
"0007HGS378G8W", "7DHCHR11GCZ6C", "BG81E8NJN8M3R", "7DHCHR11GCZ6D", 
"BG81E8NJN8M3Q"), subset = c(1L, 1L, 1L, 1L, 2L, 2L)), row.names = c(NA, 
-6L), class = "data.frame")

I tried using pivot_wider but to no avail. My attempt:

library(tidyverse)
library(data.table)

    df %>% 
      group_by(subset) %>%
      mutate(no1= rleid(subset,exp_stat)) %>% 
      mutate(no=row_number()) %>% 
      pivot_wider(names_from="exp_stat",
                  values_from="no1")

Entire data:

structure(list(id = c("24GMHA78TGM7D", "6E1T23X96Y35R", "9N6GXF4DZYQR9", 
"0007HGS378G8W", "7DHCHR11GCZ6C", "BG81E8NJN8M3R", "03WFCSQP8BEJV", 
"1C3EZWWH9AKVH", "FMX97PFVXHG9F", "2D788PS159YQA", "FA9E1X97QDBET", 
"FGF0BD4F92CB6", "0011BE039X4TN", "50YEZB08NC0PX", "07RGHNR8YZTV0", 
"612HPTSCD97DK", "3D91MHAVJQTBH", "3KJF3YYVC1X2Y", "1ATYE2M0BRV29", 
"1FW25TMBNNMKZ", "F433HAXGJ0B1F", "DS5NPYY3W9HFA", "0GP2FRPGMBS0X", 
"76AKE060S7P7A", "3TPVX8DDTX2Y2", "ES3DMV2BEWMSJ", "17EYCTGD2B5TM", 
"49M2ATYWJKR0E", "ANASVHK91GZS2", "EJHGHVHD776QD", "F2RMZFK564M2N", 
"BJF6JY5VA4MCH", "BZ1V1FFYCGD45", "3XAKGTF09T65M", "ADZC3Q7JX48V1", 
"FWX6RN0RQB5CZ", "CZ72XDBVPZYRQ", "D96NC82778FDP", "5PYVE3R8XB8BV", 
"6TAJC1HEXNDQP", "5VEQP8DHNSQB8", "A21B8H4TQ497T", "D1W9FTQ614ECH", 
"0C5H4CRY9PTNV", "16Z61HABMQAGF", "5Y7Y2FHQKBVQT", "8KQQ2GH11RCM5", 
"B52DEW8X6BNAN", "E5G6F2DAS9VKA", "2CHCK9D83QP3E", "1S9TXXSVGVJBV", 
"AGSJ33ZCCPZEA", "7DHPJF99FP34W", "90P5BBEJZM45D", "249QNK0HM8DN2", 
"54PGWA5EDSMZH", "1EJF0P0EVR88X", "1GD5KYBVF4ZMV", "DNBJC9ZTR8F5E", 
"8BKV7YE188FBS", "FKRVE51TTRYDE", "603RR2KTXKVA6", "F2DXD2E64HEXE", 
"09WF6M77F8CRB", "4GYRAF4HM97T0", "ARXJSKRX5VN8A"), exp_stat = c(1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), subset = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA, 
-66L))

CodePudding user response:

Here is one solution:

I first create a dataframe only containing the subset exp_stat = 1. Then I join to the subset exp_stat = 0:

library(tidyverse)

df1 <- df %>% as_tibble() %>% filter(exp_stat == 1) %>%
  group_by(subset) %>%
  mutate(join_col = 1:n())


df %>% group_by(subset) %>%
  mutate(n = sum(exp_stat == 1)) %>%
  filter(exp_stat == 0) %>%
  rename(id_1 = id) %>% 
  mutate(join_col = rep(1:first(n), each = n()/ first(n))[1:n()]) %>%
  left_join(df1, by = c("subset", "join_col")) %>%
  select(id, id_1, subset)


# A tibble: 6 x 3
# Groups:   subset [2]
  id            id_1          subset
  <chr>         <chr>          <int>
1 24GMHA78TGM7D 9N6GXF4DZYQR9      1
2 24GMHA78TGM7D 0007HGS378G8W      1
3 6E1T23X96Y35R 7DHCHR11GCZ6C      1
4 6E1T23X96Y35R BG81E8NJN8M3R      1
5 0007HGS378G8Z 7DHCHR11GCZ6D      2
6 0007HGS378G8Z BG81E8NJN8M3Q      2
  • Related