Home > Blockchain >  R: How do I group dataframe rows by binary category?
R: How do I group dataframe rows by binary category?

Time:03-20

I want to reorder the bipolar dataframe such that the Indication column with 1/2 binary categorical variable appear as the first few rows, followed by the rows with 0/1.

library(dplyr)

setDT(dat); setDT(ann)
bipolar <- ann %>%
  select(FID=Database_ID,Indication=Profile) %>% 
  mutate(Indication=recode(Indication,"Unaffected control"="0/1", "BP"="1/2")) %>% 
  inner_join(dat, ., by="FID")

bipolar %>% group_by(Indication) %>% tally() %>% replace(., is.na(.),0)

Example of desired output:

FID IID SOL C1 C2 Indication
AC13 1 0 -0.02851720 0.00450319 1/2
AC14 1 0 -0.04220610 0.00394058 1/2
AC18 1 0 0.03357880 0.00310475 1/2
AC15 1 0 -0.01351050 -0.03165270 0/1
AC19 1 0 0.00453814 0.01607500 0/1
AC13 1 0 -0.02851720 0.00450319 1/2

bipolar

> dput(bipolar)

structure(list(FID = c("AC13", "AC14", "AC15", "AC18", "AC19",
"AC1", "AC20", "AC21", "AC23", "AC24", "AC27", "AC29", "AC2",
"AC30", "AC32", "AC33", "AC34", "AC35", "AC36", "AC38", "AC42",
"AC43", "AC46", "AC48", "AC49", "AC50", "AC51", "AC52", "AC53",
"AC54", "AC56", "AC57", "AC58", "AC5", "AC60", "AC61", "AC62",
"AC63", "AC64", "AC65", "AC67", "AC69", "AC6", "AC70", "AC71",
"AC72", "AC74", "AC76", "AC77", "AC79", "AC80", "AC83", "AC84",
"AC86", "AC89", "AC8", "AC90", "AC91", "AC102", "AC103", "AC104",
"AC105", "AC16", "AC95", "AC96", "AC99", "DE10", "DE12", "DE13",
"DE14", "DE16", "DE17", "DE22", "DE23", "DE27", "DE36", "DE37",
"DE38", "DE39", "DE3", "DE40", "DE45", "DE46", "DE47", "DE4",
"DE50", "DE51", "DE52", "DE55", "DE57", "DE59", "DE7", "DE32",
"DE43"), IID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), SOL = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), C1 = c(-0.0285172, -0.0422061, -0.0135105, 0.0335788,
0.00453814, -0.0196805, 0.00871406, -0.0359998, 0.00776039, -0.0152431,
-0.0157209, 0.0235421, -0.0292564, 0.000164687, -0.0104566, -0.0078923,
-0.011623, 0.0173727, 0.00327281, -0.0101589, -0.00564814, 0.0232102,
0.00209183, -0.0245178, 0.0295213, -0.0012923, 0.00775297, 0.019803,
0.00521905, 0.0143496, 0.0243968, 0.0403667, 0.0420627, -0.0234294,
-0.00923894, 0.00216892, -0.0174861, 0.000902948, 0.0250136,
0.131506, 0.036772, -0.0314508, 0.0014534, 0.0123288, 0.0152248,
0.030759, 0.029896, 0.0257204, -0.0132957, -0.00942661, 0.0286468,
0.0297961, -0.0225471, 0.0185018, -0.0103523, -0.00141674, -0.00553813,
0.0141762, 0.0254432, -0.0124213, -0.0220576, 0.0278842, 0.00114805,
-0.013759, 0.00493565, -0.0146888, -0.00463272, -0.0332577, -0.0200095,
0.00230828, -0.0232427, 0.00433733, -0.0244282, -0.0245909, 0.0155191,
0.00425367, 0.00868522, -0.0347233, 0.00191488, -0.0154211, 0.0154046,
-0.00126067, 0.00826277, -0.0124104, -0.0136281, -0.023651, 0.00853699,
-0.0373163, 0.0147421, -0.0425991, -0.0198727, -0.0197213, -0.0115728,
-0.0127898), C2 = c(0.00450319, 0.00394058, -0.0316527, 0.00310475,
0.016075, 0.0108606, 0.0195024, -0.0109932, 0.02644, 0.00177053,
-0.0180791, -0.0128455, -0.0353571, 0.00868615, 0.0278649, -0.0143205,
0.0198699, 0.00600335, 0.00636933, -0.0245385, 0.0247255, -0.0019047,
0.00418658, -0.0106317, 0.0329249, -0.0490193, -0.0322256, -0.00525161,
0.0127281, -0.0210357, 0.048556, -0.0157994, 0.0225328, 0.0420733,
-0.0722942, 0.0117474, -0.00108231, -0.053874, -0.0351118, 0.000781904,
-0.0188162, -0.0214653, 0.0150199, 0.0339645, -0.0335628, -0.0151206,
0.0163763, -0.015727, -0.0232298, 0.0172519, 0.0348876, -0.026288,
0.0383726, -0.018123, 0.0200251, -0.0246757, 0.0184051, 0.0249351,
-0.00324928, 0.0129067, -0.0143993, -0.00150337, -0.00089652,
-0.0477761, 0.00873251, -0.0184572, 0.00115896, 0.0252723, -0.0188119,
0.0403222, -0.00957213, -0.0280059, 0.0183744, -0.025548, 0.021987,
0.0268481, -0.0267149, 0.00712551, -0.0115199, 0.00559716, 0.00779719,
-0.0181187, -0.0182654, -0.0263084, 0.00126466, 0.0397802, 0.0457278,
0.0098323, -0.00865206, 0.0248558, -0.0163334, 0.00451314, 0.0337946,
0.0267819), Indication = c("1/2", "1/2", "0/1", "1/2", "0/1",
"1/2", "1/2", "1/2", "1/2", "0/1", "0/1", "0/1", "1/2", "0/1",
"1/2", "0/1", "1/2", "0/1", "1/2", "0/1", "1/2", "0/1", "1/2",
"1/2", "0/1", "1/2", "0/1", "0/1", "0/1", "0/1", "0/1", "1/2",
"1/2", "1/2", "0/1", "1/2", "1/2", "1/2", "1/2", "0/1", "0/1",
"1/2", "1/2", "0/1", "0/1", "1/2", "1/2", "1/2", "0/1", "0/1",
"0/1", "0/1", "0/1", "0/1", "1/2", "1/2", "0/1", "0/1", "1/2",
"0/1", "1/2", "0/1", "1/2", "0/1", "0/1", "0/1", "0/1", "0/1",
"1/2", "1/2", "0/1", "0/1", "0/1", "0/1", "1/2", "1/2", "0/1",
"0/1", "0/1", "1/2", "1/2", "1/2", "0/1", "0/1", "0/1", "0/1",
"1/2", "1/2", "1/2", "1/2", "0/1", "1/2", "1/2", "1/2")), class = c("data.table",
"data.frame"), row.names = c(NA, -94L))

CodePudding user response:

Ordering on a logical condition sorts FALSE before TRUE (perhaps because FALSE is analogous to 0, and TRUE is analogous to 1).

bipolar[order(!Indication == "1/2"),]
#        FID   IID   SOL          C1          C2 Indication
#     <char> <int> <int>       <num>       <num>     <char>
#  1:   AC13     1     0 -0.02851720  0.00450319        1/2
#  2:   AC14     1     0 -0.04220610  0.00394058        1/2
#  3:   AC18     1     0  0.03357880  0.00310475        1/2
#  4:    AC1     1     0 -0.01968050  0.01086060        1/2
#  5:   AC20     1     0  0.00871406  0.01950240        1/2
#  6:   AC21     1     0 -0.03599980 -0.01099320        1/2
#  7:   AC23     1     0  0.00776039  0.02644000        1/2
#  8:    AC2     1     0 -0.02925640 -0.03535710        1/2
#  9:   AC32     1     0 -0.01045660  0.02786490        1/2
# 10:   AC34     1     0 -0.01162300  0.01986990        1/2
# ---                                                      
# 85:   DE22     1     0 -0.02442820  0.01837440        0/1
# 86:   DE23     1     0 -0.02459090 -0.02554800        0/1
# 87:   DE37     1     0  0.00868522 -0.02671490        0/1
# 88:   DE38     1     0 -0.03472330  0.00712551        0/1
# 89:   DE39     1     0  0.00191488 -0.01151990        0/1
# 90:   DE46     1     0  0.00826277 -0.01826540        0/1
# 91:   DE47     1     0 -0.01241040 -0.02630840        0/1
# 92:    DE4     1     0 -0.01362810  0.00126466        0/1
# 93:   DE50     1     0 -0.02365100  0.03978020        0/1
# 94:   DE59     1     0 -0.01987270 -0.01633340        0/1

CodePudding user response:

We can use arrange (the OP's expected output was not clear though)

library(dplyr)
bipolar %>%
     arrange( Indication != "1/2")

-output

      FID   IID   SOL           C1           C2 Indication
    <char> <int> <int>        <num>        <num>     <char>
 1:   AC13     1     0 -0.028517200  0.004503190        1/2
 2:   AC14     1     0 -0.042206100  0.003940580        1/2
 3:   AC18     1     0  0.033578800  0.003104750        1/2
 4:    AC1     1     0 -0.019680500  0.010860600        1/2
 5:   AC20     1     0  0.008714060  0.019502400        1/2
 6:   AC21     1     0 -0.035999800 -0.010993200        1/2
 7:   AC23     1     0  0.007760390  0.026440000        1/2
 8:    AC2     1     0 -0.029256400 -0.035357100        1/2
 9:   AC32     1     0 -0.010456600  0.027864900        1/2
10:   AC34     1     0 -0.011623000  0.019869900        1/2
11:   AC36     1     0  0.003272810  0.006369330        1/2
12:   AC42     1     0 -0.005648140  0.024725500        1/2
13:   AC46     1     0  0.002091830  0.004186580        1/2
14:   AC48     1     0 -0.024517800 -0.010631700        1/2
15:   AC50     1     0 -0.001292300 -0.049019300        1/2
...

CodePudding user response:

From my understanding you're trying to reverse-order your dataset by Indication. Many way of doing this, one of them is:

dplyr::arrange(bipolar, desc(Indication))
  • Related