Home > Mobile >  Converting and minimizing a data.frame in an efficient way
Converting and minimizing a data.frame in an efficient way

Time:12-08

I'm new to R, I was wondering what is the most efficient way to convert my data into my Desired_Output below?

m="
    n min max median    q1    q3 group        m       SD
1  30  30  55   44.5 43.25 49.75 treat 45.38524 5.593169
2  30  31  55   47.0 44.00 49.00 treat 46.11951 4.886821
3  30  40  55   48.0 45.00 51.00 treat 47.92676 4.173242
4  15  30  51   44.0 42.50 45.50 treat 43.21604 4.245150
5  15  31  54   46.0 42.50 48.50 treat 44.94723 5.759449
6  15  44  55   48.0 45.00 48.50 treat 47.66393 3.012334
7  15  39  55   49.0 44.00 52.00 treat 48.01439 5.571240
8  15  41  55   48.0 44.50 50.00 treat 47.59677 4.261415
9  15  40  55   47.0 45.00 50.00 treat 47.38081 4.200670

10 18  42  55   46.0 44.00 49.50  cont 46.91764 3.996259
11 18  40  55   44.0 43.00 47.00  cont 45.25704 3.667377
12 18  41  55   44.5 44.00 50.00  cont 46.58674 4.334604
13  9  42  49   46.0 43.00 48.00  cont 45.60879 3.357931
14  9  42  48   44.0 43.00 45.00  cont 44.29745 1.878592
15  9  41  55   44.0 44.00 45.00  cont 45.43229 2.779801
16  9  43  55   50.0 44.00 52.00  cont 48.73261 5.506545
17  9  43  55   46.0 44.00 51.00  cont 47.61981 5.069204
18  9  41  55   50.0 44.00 51.00  cont 48.19267 5.403842"

data <- read.table(text=m, h=T)

Desired_Output="
nT       mT      sdT  nC       mC      sdC
30 45.38524 5.593169  18 46.91764 3.996259
30 46.11951 4.886821  18 45.25704 3.667377
30 47.92676 4.173242  18 46.58674 4.334604
.  .        .         .  .        .
.  .        .         .  .        .
.  .        .         .  .        .
15 47.38081 4.200670  9  48.19267 5.403842"

CodePudding user response:

Here is a base R solution,

l2 <- split(data[c('n', 'm', 'SD')], data$group)
do.call(cbind, Map(function(x, y){names(x) <- paste0(names(x), '_', y); x}, l2, names(l2)))

#   cont.n_cont cont.m_cont cont.SD_cont treat.n_treat treat.m_treat treat.SD_treat
#10          18    46.91764     3.996259            30      45.38524       5.593169
#11          18    45.25704     3.667377            30      46.11951       4.886821
#12          18    46.58674     4.334604            30      47.92676       4.173242
#13           9    45.60879     3.357931            15      43.21604       4.245150
#14           9    44.29745     1.878592            15      44.94723       5.759449
#15           9    45.43229     2.779801            15      47.66393       3.012334
#16           9    48.73261     5.506545            15      48.01439       5.571240
#17           9    47.61981     5.069204            15      47.59677       4.261415
#18           9    48.19267     5.403842            15      47.38081       4.200670

CodePudding user response:

It's little messy, but you may try using dplyr

library(dplyr)
data <- data %>%
  group_by(group) %>%
  mutate(idx = 1:n()) %>%
  select(group, n, idx, m, SD) %>%
  ungroup
df1 <- data %>%
  filter(group == "treat") %>%
  select(-group)
df2 <- data %>%
  filter(group == "cont") %>%
  select(-group)
df <- df1 %>%
  left_join(df2, by = "idx") %>%
  select(-idx)
names(df) <- c("nT", "mT", "sdT", "nC", "mC", "sdC")
df

     nT    mT   sdT    nC    mC   sdC
  <int> <dbl> <dbl> <int> <dbl> <dbl>
1    30  45.4  5.59    18  46.9  4.00
2    30  46.1  4.89    18  45.3  3.67
3    30  47.9  4.17    18  46.6  4.33
4    15  43.2  4.25     9  45.6  3.36
5    15  44.9  5.76     9  44.3  1.88
6    15  47.7  3.01     9  45.4  2.78
7    15  48.0  5.57     9  48.7  5.51
8    15  47.6  4.26     9  47.6  5.07
9    15  47.4  4.20     9  48.2  5.40

Or using tidyr::pivot_wider,

library(tidyr)
data %>%
  group_by(group) %>%
  mutate(idx = 1:n()) %>%
  select(group, n, idx, m, SD) %>%
  ungroup %>%
  pivot_wider(id_cols = idx, values_from = c(n, m, SD), names_from = group) %>%
  select(-idx)

  n_treat n_cont m_treat m_cont SD_treat SD_cont
    <int>  <int>   <dbl>  <dbl>    <dbl>   <dbl>
1      30     18    45.4   46.9     5.59    4.00
2      30     18    46.1   45.3     4.89    3.67
3      30     18    47.9   46.6     4.17    4.33
4      15      9    43.2   45.6     4.25    3.36
5      15      9    44.9   44.3     5.76    1.88
6      15      9    47.7   45.4     3.01    2.78
7      15      9    48.0   48.7     5.57    5.51
8      15      9    47.6   47.6     4.26    5.07
9      15      9    47.4   48.2     4.20    5.40

CodePudding user response:

New answer, dplyr only solution:

library(dplyr)
df %>% 
  select(n, m, SD, group) %>% 
  slice(10:18) %>% 
  rename_with(~ paste0(.x, "C")) %>% 
  bind_cols(df[1:9, ]) %>% 
  rename_with( ~ paste0(.x, "T"), c(n, m, SD)) %>% 
  select(nT, mT, sdT=SDT, nC, mC, sdC=SDC)
   nT       mT      sdT nC       mC      sdC
10 30 45.38524 5.593169 18 46.91764 3.996259
11 30 46.11951 4.886821 18 45.25704 3.667377
12 30 47.92676 4.173242 18 46.58674 4.334604
13 15 43.21604 4.245150  9 45.60879 3.357931
14 15 44.94723 5.759449  9 44.29745 1.878592
15 15 47.66393 3.012334  9 45.43229 2.779801
16 15 48.01439 5.571240  9 48.73261 5.506545
17 15 47.59677 4.261415  9 47.61981 5.069204
18 15 47.38081 4.200670  9 48.19267 5.403842
  • Related