Home > Blockchain >  How to create a new dataframe based on another dataframe?
How to create a new dataframe based on another dataframe?

Time:11-01

I want to create a dataframe pathways with two columns:

  1. Genes: All the genes that are in the Genes column of the enriched data frame.
  2. Pathways: The rownames of enriched that correspond to each gene; the gene may appear in more than one row.

I'm not sure where to begin.

How I generated the enriched data frame:

rownames(enrich.top5) <- enrich.top5[,2]
enrich.top5[,2] <- NULL
enriched <- data.frame(do.call('rbind', strsplit(as.character(enrich.top5$Genes),';',fixed=TRUE)))
rownames(enriched) <- rownames(enrich.top5)

enriched

> dput(enriched[1:5,1:20])
structure(list(X1 = c("CALML6", "ATF2", "MYLK2", "ATF2", "PRDM4"
), X2 = c("CALML3", "ARAF", "ITGA2B", "PPP2R2A", "CALML6"), X3 = c("CALML4", 
"ELK1", "TNC", "TCL1B", "IRS1"), X4 = c("ACTB", "CRKL", "ELK1", 
"TCL1A", "CALML3"), X5 = c("CRKL", "ELK4", "ACTB", "PPP2R1B", 
"CALML4"), X6 = c("AKT2", "RPS6KA4", "MYLK3", "PPP2R1A", "CRKL"
), X7 = c("RASSF5", "RPS6KA3", "CRKL", "CREB3L4", "RPS6KA3"), 
    X8 = c("AKT3", "RPS6KA6", "MYLK", "CREB3L1", "RPS6KA6"), 
    X9 = c("KDR", "RPS6KA5", "ACTG1", "MYC", "RPS6KA5"), X10 = c("AKT1", 
    "MYC", "IGF1R", "AKT2", "AKT2"), X11 = c("PLCE1", "AKT2", 
    "MYLK4", "MYB", "ARHGDIA"), X12 = c("PRKCG", "RPS6KA2", "PPP1CB", 
    "CREB3L2", "RPS6KA2"), X13 = c("PRKCI", "AKT3", "COMP", "AKT3", 
    "AKT3"), X14 = c("PRKCB", "STMN1", "PPP1CC", "KDR", "RPS6KA1"
    ), X15 = c("PRKCA", "RPS6KA1", "CCND3", "AKT1", "ARHGDIB"
    ), X16 = c("TIAM1", "KDR", "CCND2", "FLT3LG", "AKT1"), X17 = c("ADCY9", 
    "AKT1", "CCND1", "PRKCA", "MAP3K5"), X18 = c("PRKD3", "PRKACA", 
    "IBSP", "EREG", "MAP2K1"), X19 = c("PARD3", "PRKACB", "TNN", 
    "CDC37", "MAP2K2"), X20 = c("PFN4", "PRKCG", "AKT2", "DDIT4", 
    "PRKCD")), row.names = c("Rap1 signaling pathway", "MAPK signaling pathway", 
"Focal adhesion", "PI3K-Akt signaling pathway", "Neurotrophin signaling pathway"
), class = "data.frame")

Desired output (example only):

pathways = data.frame(
  Genes = c(
    "TP53",
    "WT1",
    "PHF6",
    "DNMT3A",
    "DNMT3B",
    "TET1",
    "TET2",
    "IDH1",
    "IDH2",
    "FLT3",
    "KIT",
    "KRAS",
    "NRAS",
    "RUNX1",
    "CEBPA",
    "ASXL1",
    "EZH2",
    "KDM6A"
  ),
  Pathway = rep(c(
    "TSG", "DNAm", "Signalling", "TFs", "ChromMod"
  ), c(3, 6, 4, 2, 3)),
  stringsAsFactors = FALSE
)

head(pathways)
#>    Genes Pathway
#> 1   TP53     TSG
#> 2    WT1     TSG
#> 3   PHF6     TSG
#> 4 DNMT3A    DNAm
#> 5 DNMT3B    DNAm
#> 6   TET1    DNAm

CodePudding user response:

perhaps this helps

as.data.frame(as.table(as.matrix(enriched)))[-2]

-output

                      Var1    Freq
1           Rap1 signaling pathway  CALML6
2           MAPK signaling pathway    ATF2
3                   Focal adhesion   MYLK2
4       PI3K-Akt signaling pathway    ATF2
5   Neurotrophin signaling pathway   PRDM4
6           Rap1 signaling pathway  CALML3
7           MAPK signaling pathway    ARAF
8                   Focal adhesion  ITGA2B
9       PI3K-Akt signaling pathway PPP2R2A
10  Neurotrophin signaling pathway  CALML6
11          Rap1 signaling pathway  CALML4
12          MAPK signaling pathway    ELK1
13                  Focal adhesion     TNC
14      PI3K-Akt signaling pathway   TCL1B
15  Neurotrophin signaling pathway    IRS1
16          Rap1 signaling pathway    ACTB
17          MAPK signaling pathway    CRKL
...

CodePudding user response:

Do you mean something like this?

library(tidyverse)

df %>% 
  rownames_to_column("pathway") %>% 
  pivot_longer(-pathway, names_to = "name", values_to = "Genes") %>% 
  arrange(name) %>% 
  select(-name) %>% 
  data.frame()
  pathway   Genes
1           Rap1 signaling pathway  CALML6
2           MAPK signaling pathway    ATF2
3                   Focal adhesion   MYLK2
4       PI3K-Akt signaling pathway    ATF2
5   Neurotrophin signaling pathway   PRDM4
6           Rap1 signaling pathway    AKT1
7           MAPK signaling pathway     MYC
8                   Focal adhesion   IGF1R
9       PI3K-Akt signaling pathway    AKT2
10  Neurotrophin signaling pathway    AKT2
11          Rap1 signaling pathway   PLCE1
12          MAPK signaling pathway    AKT2
13                  Focal adhesion   MYLK4
14      PI3K-Akt signaling pathway     MYB
15  Neurotrophin signaling pathway ARHGDIA
16          Rap1 signaling pathway   PRKCG
17          MAPK signaling pathway RPS6KA2
18                  Focal adhesion  PPP1CB
19      PI3K-Akt signaling pathway CREB3L2
20  Neurotrophin signaling pathway RPS6KA2
21          Rap1 signaling pathway   PRKCI
22          MAPK signaling pathway    AKT3
23                  Focal adhesion    COMP
24      PI3K-Akt signaling pathway    AKT3
25  Neurotrophin signaling pathway    AKT3
26          Rap1 signaling pathway   PRKCB
27          MAPK signaling pathway   STMN1
28                  Focal adhesion  PPP1CC
29      PI3K-Akt signaling pathway     KDR
30  Neurotrophin signaling pathway RPS6KA1
31          Rap1 signaling pathway   PRKCA
32          MAPK signaling pathway RPS6KA1
33                  Focal adhesion   CCND3
34      PI3K-Akt signaling pathway    AKT1
35  Neurotrophin signaling pathway ARHGDIB
36          Rap1 signaling pathway   TIAM1
37          MAPK signaling pathway     KDR
38                  Focal adhesion   CCND2
39      PI3K-Akt signaling pathway  FLT3LG
40  Neurotrophin signaling pathway    AKT1
41          Rap1 signaling pathway   ADCY9
42          MAPK signaling pathway    AKT1
43                  Focal adhesion   CCND1
44      PI3K-Akt signaling pathway   PRKCA
45  Neurotrophin signaling pathway  MAP3K5
46          Rap1 signaling pathway   PRKD3
47          MAPK signaling pathway  PRKACA
48                  Focal adhesion    IBSP
49      PI3K-Akt signaling pathway    EREG
50  Neurotrophin signaling pathway  MAP2K1
51          Rap1 signaling pathway   PARD3
52          MAPK signaling pathway  PRKACB
53                  Focal adhesion     TNN
54      PI3K-Akt signaling pathway   CDC37
55  Neurotrophin signaling pathway  MAP2K2
56          Rap1 signaling pathway  CALML3
57          MAPK signaling pathway    ARAF
58                  Focal adhesion  ITGA2B
59      PI3K-Akt signaling pathway PPP2R2A
60  Neurotrophin signaling pathway  CALML6
61          Rap1 signaling pathway    PFN4
62          MAPK signaling pathway   PRKCG
63                  Focal adhesion    AKT2
64      PI3K-Akt signaling pathway   DDIT4
65  Neurotrophin signaling pathway   PRKCD
66          Rap1 signaling pathway  CALML4
67          MAPK signaling pathway    ELK1
68                  Focal adhesion     TNC
69      PI3K-Akt signaling pathway   TCL1B
70  Neurotrophin signaling pathway    IRS1
71          Rap1 signaling pathway    ACTB
72          MAPK signaling pathway    CRKL
73                  Focal adhesion    ELK1
74      PI3K-Akt signaling pathway   TCL1A
75  Neurotrophin signaling pathway  CALML3
76          Rap1 signaling pathway    CRKL
77          MAPK signaling pathway    ELK4
78                  Focal adhesion    ACTB
79      PI3K-Akt signaling pathway PPP2R1B
80  Neurotrophin signaling pathway  CALML4
81          Rap1 signaling pathway    AKT2
82          MAPK signaling pathway RPS6KA4
83                  Focal adhesion   MYLK3
84      PI3K-Akt signaling pathway PPP2R1A
85  Neurotrophin signaling pathway    CRKL
86          Rap1 signaling pathway  RASSF5
87          MAPK signaling pathway RPS6KA3
88                  Focal adhesion    CRKL
89      PI3K-Akt signaling pathway CREB3L4
90  Neurotrophin signaling pathway RPS6KA3
91          Rap1 signaling pathway    AKT3
92          MAPK signaling pathway RPS6KA6
93                  Focal adhesion    MYLK
94      PI3K-Akt signaling pathway CREB3L1
95  Neurotrophin signaling pathway RPS6KA6
96          Rap1 signaling pathway     KDR
97          MAPK signaling pathway RPS6KA5
98                  Focal adhesion   ACTG1
99      PI3K-Akt signaling pathway     MYC
100 Neurotrophin signaling pathway RPS6KA5
  • Related