I want to create a dataframe pathways
with two columns:
Genes
: All the genes that are in theGenes
column of theenriched
data frame.Pathways
: The rownames ofenriched
that correspond to each gene; the gene may appear in more than one row.
I'm not sure where to begin.
How I generated the enriched
data frame:
rownames(enrich.top5) <- enrich.top5[,2]
enrich.top5[,2] <- NULL
enriched <- data.frame(do.call('rbind', strsplit(as.character(enrich.top5$Genes),';',fixed=TRUE)))
rownames(enriched) <- rownames(enrich.top5)
enriched
> dput(enriched[1:5,1:20])
structure(list(X1 = c("CALML6", "ATF2", "MYLK2", "ATF2", "PRDM4"
), X2 = c("CALML3", "ARAF", "ITGA2B", "PPP2R2A", "CALML6"), X3 = c("CALML4",
"ELK1", "TNC", "TCL1B", "IRS1"), X4 = c("ACTB", "CRKL", "ELK1",
"TCL1A", "CALML3"), X5 = c("CRKL", "ELK4", "ACTB", "PPP2R1B",
"CALML4"), X6 = c("AKT2", "RPS6KA4", "MYLK3", "PPP2R1A", "CRKL"
), X7 = c("RASSF5", "RPS6KA3", "CRKL", "CREB3L4", "RPS6KA3"),
X8 = c("AKT3", "RPS6KA6", "MYLK", "CREB3L1", "RPS6KA6"),
X9 = c("KDR", "RPS6KA5", "ACTG1", "MYC", "RPS6KA5"), X10 = c("AKT1",
"MYC", "IGF1R", "AKT2", "AKT2"), X11 = c("PLCE1", "AKT2",
"MYLK4", "MYB", "ARHGDIA"), X12 = c("PRKCG", "RPS6KA2", "PPP1CB",
"CREB3L2", "RPS6KA2"), X13 = c("PRKCI", "AKT3", "COMP", "AKT3",
"AKT3"), X14 = c("PRKCB", "STMN1", "PPP1CC", "KDR", "RPS6KA1"
), X15 = c("PRKCA", "RPS6KA1", "CCND3", "AKT1", "ARHGDIB"
), X16 = c("TIAM1", "KDR", "CCND2", "FLT3LG", "AKT1"), X17 = c("ADCY9",
"AKT1", "CCND1", "PRKCA", "MAP3K5"), X18 = c("PRKD3", "PRKACA",
"IBSP", "EREG", "MAP2K1"), X19 = c("PARD3", "PRKACB", "TNN",
"CDC37", "MAP2K2"), X20 = c("PFN4", "PRKCG", "AKT2", "DDIT4",
"PRKCD")), row.names = c("Rap1 signaling pathway", "MAPK signaling pathway",
"Focal adhesion", "PI3K-Akt signaling pathway", "Neurotrophin signaling pathway"
), class = "data.frame")
Desired output (example only):
pathways = data.frame(
Genes = c(
"TP53",
"WT1",
"PHF6",
"DNMT3A",
"DNMT3B",
"TET1",
"TET2",
"IDH1",
"IDH2",
"FLT3",
"KIT",
"KRAS",
"NRAS",
"RUNX1",
"CEBPA",
"ASXL1",
"EZH2",
"KDM6A"
),
Pathway = rep(c(
"TSG", "DNAm", "Signalling", "TFs", "ChromMod"
), c(3, 6, 4, 2, 3)),
stringsAsFactors = FALSE
)
head(pathways)
#> Genes Pathway
#> 1 TP53 TSG
#> 2 WT1 TSG
#> 3 PHF6 TSG
#> 4 DNMT3A DNAm
#> 5 DNMT3B DNAm
#> 6 TET1 DNAm
CodePudding user response:
perhaps this helps
as.data.frame(as.table(as.matrix(enriched)))[-2]
-output
Var1 Freq
1 Rap1 signaling pathway CALML6
2 MAPK signaling pathway ATF2
3 Focal adhesion MYLK2
4 PI3K-Akt signaling pathway ATF2
5 Neurotrophin signaling pathway PRDM4
6 Rap1 signaling pathway CALML3
7 MAPK signaling pathway ARAF
8 Focal adhesion ITGA2B
9 PI3K-Akt signaling pathway PPP2R2A
10 Neurotrophin signaling pathway CALML6
11 Rap1 signaling pathway CALML4
12 MAPK signaling pathway ELK1
13 Focal adhesion TNC
14 PI3K-Akt signaling pathway TCL1B
15 Neurotrophin signaling pathway IRS1
16 Rap1 signaling pathway ACTB
17 MAPK signaling pathway CRKL
...
CodePudding user response:
Do you mean something like this?
library(tidyverse)
df %>%
rownames_to_column("pathway") %>%
pivot_longer(-pathway, names_to = "name", values_to = "Genes") %>%
arrange(name) %>%
select(-name) %>%
data.frame()
pathway Genes
1 Rap1 signaling pathway CALML6
2 MAPK signaling pathway ATF2
3 Focal adhesion MYLK2
4 PI3K-Akt signaling pathway ATF2
5 Neurotrophin signaling pathway PRDM4
6 Rap1 signaling pathway AKT1
7 MAPK signaling pathway MYC
8 Focal adhesion IGF1R
9 PI3K-Akt signaling pathway AKT2
10 Neurotrophin signaling pathway AKT2
11 Rap1 signaling pathway PLCE1
12 MAPK signaling pathway AKT2
13 Focal adhesion MYLK4
14 PI3K-Akt signaling pathway MYB
15 Neurotrophin signaling pathway ARHGDIA
16 Rap1 signaling pathway PRKCG
17 MAPK signaling pathway RPS6KA2
18 Focal adhesion PPP1CB
19 PI3K-Akt signaling pathway CREB3L2
20 Neurotrophin signaling pathway RPS6KA2
21 Rap1 signaling pathway PRKCI
22 MAPK signaling pathway AKT3
23 Focal adhesion COMP
24 PI3K-Akt signaling pathway AKT3
25 Neurotrophin signaling pathway AKT3
26 Rap1 signaling pathway PRKCB
27 MAPK signaling pathway STMN1
28 Focal adhesion PPP1CC
29 PI3K-Akt signaling pathway KDR
30 Neurotrophin signaling pathway RPS6KA1
31 Rap1 signaling pathway PRKCA
32 MAPK signaling pathway RPS6KA1
33 Focal adhesion CCND3
34 PI3K-Akt signaling pathway AKT1
35 Neurotrophin signaling pathway ARHGDIB
36 Rap1 signaling pathway TIAM1
37 MAPK signaling pathway KDR
38 Focal adhesion CCND2
39 PI3K-Akt signaling pathway FLT3LG
40 Neurotrophin signaling pathway AKT1
41 Rap1 signaling pathway ADCY9
42 MAPK signaling pathway AKT1
43 Focal adhesion CCND1
44 PI3K-Akt signaling pathway PRKCA
45 Neurotrophin signaling pathway MAP3K5
46 Rap1 signaling pathway PRKD3
47 MAPK signaling pathway PRKACA
48 Focal adhesion IBSP
49 PI3K-Akt signaling pathway EREG
50 Neurotrophin signaling pathway MAP2K1
51 Rap1 signaling pathway PARD3
52 MAPK signaling pathway PRKACB
53 Focal adhesion TNN
54 PI3K-Akt signaling pathway CDC37
55 Neurotrophin signaling pathway MAP2K2
56 Rap1 signaling pathway CALML3
57 MAPK signaling pathway ARAF
58 Focal adhesion ITGA2B
59 PI3K-Akt signaling pathway PPP2R2A
60 Neurotrophin signaling pathway CALML6
61 Rap1 signaling pathway PFN4
62 MAPK signaling pathway PRKCG
63 Focal adhesion AKT2
64 PI3K-Akt signaling pathway DDIT4
65 Neurotrophin signaling pathway PRKCD
66 Rap1 signaling pathway CALML4
67 MAPK signaling pathway ELK1
68 Focal adhesion TNC
69 PI3K-Akt signaling pathway TCL1B
70 Neurotrophin signaling pathway IRS1
71 Rap1 signaling pathway ACTB
72 MAPK signaling pathway CRKL
73 Focal adhesion ELK1
74 PI3K-Akt signaling pathway TCL1A
75 Neurotrophin signaling pathway CALML3
76 Rap1 signaling pathway CRKL
77 MAPK signaling pathway ELK4
78 Focal adhesion ACTB
79 PI3K-Akt signaling pathway PPP2R1B
80 Neurotrophin signaling pathway CALML4
81 Rap1 signaling pathway AKT2
82 MAPK signaling pathway RPS6KA4
83 Focal adhesion MYLK3
84 PI3K-Akt signaling pathway PPP2R1A
85 Neurotrophin signaling pathway CRKL
86 Rap1 signaling pathway RASSF5
87 MAPK signaling pathway RPS6KA3
88 Focal adhesion CRKL
89 PI3K-Akt signaling pathway CREB3L4
90 Neurotrophin signaling pathway RPS6KA3
91 Rap1 signaling pathway AKT3
92 MAPK signaling pathway RPS6KA6
93 Focal adhesion MYLK
94 PI3K-Akt signaling pathway CREB3L1
95 Neurotrophin signaling pathway RPS6KA6
96 Rap1 signaling pathway KDR
97 MAPK signaling pathway RPS6KA5
98 Focal adhesion ACTG1
99 PI3K-Akt signaling pathway MYC
100 Neurotrophin signaling pathway RPS6KA5