Home > Enterprise >  using for loop or lapply to append to rows of data frame in r from the ttest
using for loop or lapply to append to rows of data frame in r from the ttest

Time:04-30

Alright I have checked the internet, and I am still creating a data frame of replicated lines. I have a for loop that is creating the welch t-test results, I have saved the values like such:

gene <- biomarkers$Symbol
pval <- ttest$p.value
tstat <- ttest$statistic

I tried to iterate with the for loop to add the results to a data frame which created at the start of the chunk

df2 <- data.frame(pathol=(character()),
                  genes=character(),
                 p_value=character(), 
                 t_stat=character(), 
                 stringsAsFactors=FALSE) 


for (gene in biomarkers$Symbol) {
  print(gene)
  ddat <- degs[degs$Symbol== gene & degs$pathol=="mitosis",]
  ttest <- t.test(logFC ~ value, data = ddat)
  print (ttest)
  df2[nrow(df2)   1,] #(this added the 10 genes only to the rows, not to the column) 

then I realised I need to use lapply...which I tried this:

prova <- lapply(biomarkers$Symbol, function(gene) {
  append =  (gene)
  #ttest <- t.test(logFC ~ value, data = ddat)

  })

do.call(rbind, prova)

this created a list of the ten genes, however when I uncomment the variable 'ttest', it just adds a list of:

 [1,] "logFC by value"
 [2,] "logFC by value"
 [3,] "logFC by value"
 [4,] "logFC by value"
 [5,] "logFC by value"
 [6,] "logFC by value"
 [7,] "logFC by value"
 [8,] "logFC by value"
 [9,] "logFC by value"
[10,] "logFC by value"

I would like to end up with a data frame that looks like this:

pathol genes p_value t_stat
mitosis PBK 0.05 000.4
mitosis PLK4 0.02 000.9

#the gene will be the same for all of the rows. any help would be great!

EDIT

dput output:

dput(head(ddat))
structure(list(experiment = c("FP001RO_15_HI", "FP001RO_15_HI", 
"FP001RO_15_HI", "FP001RO_15_LOW", "FP001RO_15_LOW", "FP001RO_15_LOW"
), Human.Gene.entrezID = c("57405", "57405", "57405", "57405", 
"57405", "57405"), Symbol = c("SPC25", "SPC25", "SPC25", "SPC25", 
"SPC25", "SPC25"), description = c("SPC25 component of NDC80 kinetochore complex", 
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex", 
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex", 
"SPC25 component of NDC80 kinetochore complex"), score = c(3.867, 
3.867, 3.867, 3.867, 3.867, 3.867), type = c("WGGNC", "WGGNC", 
"WGGNC", "WGGNC", "WGGNC", "WGGNC"), pathol = c("mitosis", "mitosis", 
"mitosis", "mitosis", "mitosis", "mitosis"), Probeid = c("295661_at", 
"295661_at", "295661_at", "295661_at", "295661_at", "295661_at"
), logFC = c(-0.0641349806730976, -0.0641349806730976, -0.0641349806730976, 
-0.0324566291924587, -0.0324566291924587, -0.0324566291924587
), AveExpr = c(4.1541958195567, 4.1541958195567, 4.1541958195567, 
4.17003499529702, 4.17003499529702, 4.17003499529702), t = c(-0.567682269120301, 
-0.567682269120301, -0.567682269120301, -0.214562465957216, -0.214562465957216, 
-0.214562465957216), P.Value = c(0.580865708246137, 0.580865708246137, 
0.580865708246137, 0.833648126277364, 0.833648126277364, 0.833648126277364
), adj.P.Val = c(0.828914361133465, 0.828914361133465, 0.828914361133465, 
0.999594589241814, 0.999594589241814, 0.999594589241814), B = c(-6.4683360535952, 
-6.4683360535952, -6.4683360535952, -5.45944975240508, -5.45944975240508, 
-5.45944975240508), cpd = c("FP001RO", "FP001RO", "FP001RO", 
"FP001RO", "FP001RO", "FP001RO"), time = c(15, 15, 15, 15, 15, 
15), dose = c("HI", "HI", "HI", "LOW", "LOW", "LOW"), entrezgene_rat = c(295661, 
295661, 295661, 295661, 295661, 295661), external_gene_name_rat = c("Spc25", 
"Spc25", "Spc25", "Spc25", "Spc25", "Spc25"), external_gene_name_human = c("SPC25", 
"SPC25", "SPC25", "SPC25", "SPC25", "SPC25"), entrezGene_probes_human = c("57405_at", 
"57405_at", "57405_at", "57405_at", "57405_at", "57405_at"), 
    inMap_human_withGrey = c(1, 1, 1, 1, 1, 1), inMap_rat_withGrey = c(1, 
    1, 1, 1, 1, 1), variable = structure(c(11L, 12L, 10L, 10L, 
    11L, 12L), .Label = c("Necrosis1", "Necrosis2", "Necrosis3", 
    "hyperpl1", "hyperpl2", "hyperpl3", "fibrosis", "hypertrophy1", 
    "hypertrophy2", "mitosis1", "mitosis2", "mitosis3", "vacuolation1", 
    "vacuolation2", "vacuolation3"), class = "factor"), value = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    pathology = c("mitosis", "mitosis", "mitosis", "mitosis", 
    "mitosis", "mitosis")), row.names = c(13L, 14L, 15L, 55L, 
56L, 57L), class = "data.frame")

dput of gene

dput(biomarkers$Symbol)
c("PBK", "PLK4", "CDKN3", "CDCA3", "PRC1", "CDK1", "TCF19", "SHCBP1", 
"CENPK", "SPC25")

CodePudding user response:

We may use

prova <- lapply(biomarkers$Symbol, function(gene) {
      ddat <- subset(degs, Symbol == gene & pathol =="mitosis")
      fmla <- reformulate('value', response = 'logFC')
      if(nlevels(droplevels(ddat$val)) >=2) {
          ttest <- t.test(fmla, data = ddat)
          data.frame(pathol = 'mitosis', genes = gene,
                  p_value = ttest$p.value, t_stat = ttest$statistic)
        } else NULL
     

  })
names(prova) <- biomarkers$Symbol
out <- do.call(rbind, prova)

CodePudding user response:

Addendum to your question, remember to perform multiple hypothesis testing correction when doing lots of tests at once.

Working off of akrun's answer, here's BH correction:

ord = order(prova$p.value)
prova$fdr = prova$p.value*nrow(prova)/ord

This will reduce the number of false positives.

  • Related