R lapply ifelse with multiple statements on list of dataframes-CodePudding

I have a list (fc.list) of dataframes ("t1.fc.df", "t2.fc.df") that looks like this:

fc.list$t1.fc.df
ID   log2.FC    qval
1    5.22161    0
2    4.34383    0
3    3.764772   0.86250849
4   -3.095648   0.9412494
5   -3.489743   0.904717
6   -3.648665   0.9412494

fc.list$t2.fc.df
ID   log2.FC    qval
1    6.287703   0.034547415 
2    5.751197   0.007923771
3    5.093789   0.352390406
4   -5.337459   0.007400576
5   -5.760159   0.000000000
6   -6.793630   0.000000000

I need to create a variable in my dataframes called $test which says if log2.FC is > 1 and qval is < 0.05 then write "positive", else, if log2.FC is < -1 and qval is < 0.05 write "negative", else write "NS".

I write these lines of code with lapply ifelse,

fc.list <- lapply(fc.list, function(x){
  x$test <- ifelse(unlist(x[1]) >= 1 &&
                       unlist(x[2]) <= 0.05, "positive",
                      ifelse(unlist(x[1]) <= -1 &&
                               unlist(x[2]) <= 0.05, "negative", "NS"))
  return(x)
})

but I get only "NS" Can anyone figure it out where is the problem? Thanks.

CodePudding user response：

There are a few issues with your code but easily fixable.

You're using unlist(x[column]), it's far easier and legible to use the $ to access the data.frame columns.
You used unlist(x[5]) and there is no column 5.
You used && instead of the & operator. && only evaluates the first element, whereas & evaluates every element

If you made those changes, your code works like charm.


lapply(dflist, function(x){
  x$test <- ifelse(x$log2.FC >= 1 &
                     x$qval <= 0.05, "positive",
                     ifelse(x$log2.FC <= -1 &
                           x$qval <= 0.05, "negative", "NS"))
  return(x)
})


[[1]]
  ID   log2.FC        qval     test
1  1  6.287703 0.034547415 positive
2  2  5.751197 0.007923771 positive
3  3  5.093789 0.352390406       NS
4  4 -5.337459 0.007400576 negative
5  5 -5.760159 0.000000000 negative
6  6 -6.793630 0.000000000 negative

[[2]]
  ID   log2.FC      qval     test
1  1  5.221610 0.0000000 positive
2  2  4.343830 0.0000000 positive
3  3  3.764772 0.8625085       NS
4  4 -3.095648 0.9412494       NS
5  5 -3.489743 0.9047170       NS
6  6 -3.648665 0.9412494       NS

CodePudding user response：

You can try this

lapply(fc.list, function(x) 
        cbind(x, type = ifelse(x$qval<0.05 & x$log2.FC>1, "positive", 
                        ifelse(x$qval<0.05 & x$log2.FC< -1, "negative", NA))))
$t1.fc.df
  ID   log2.FC      qval     type
1  1  5.221610 0.0000000 positive
2  2  4.343830 0.0000000 positive
3  3  3.764772 0.8625085     <NA>
4  4 -3.095648 0.9412494     <NA>
5  5 -3.489743 0.9047170     <NA>
6  6 -3.648665 0.9412494     <NA>

$t2.fc.df
  ID   log2.FC        qval     type
1  1  6.287703 0.034547415 positive
2  2  5.751197 0.007923771 positive
3  3  5.093789 0.352390406     <NA>
4  4 -5.337459 0.007400576 negative
5  5 -5.760159 0.000000000 negative
6  6 -6.793630 0.000000000 negative

Or a tidyverse

library(tidyverse)
fc.list %>% 
  map(~mutate(., type=case_when(qval<0.05 & log2.FC>1 ~ "positive",
                                qval<0.05 & log2.FC< -1 ~ "negative",
                                TRUE ~ NA_character_)))

by adding %>% map(count, type) you get the numbers of deregulated features:

$t1.fc.df
      type n
1 positive 2
2     <NA> 4

$t2.fc.df
      type n
1 negative 3
2 positive 2
3     <NA> 1

CodePudding user response：

Another possible solution:

library(tidyverse)

df1 <- read.table(text = "ID   log2.FC    qval
1    5.22161    0
2    4.34383    0
3    3.764772   0.86250849
4   -3.095648   0.9412494
5   -3.489743   0.904717
6   -3.648665   0.9412494", header = T)

df2 <- read.table(text = "ID   log2.FC    qval
1    6.287703   0.034547415 
2    5.751197   0.007923771
3    5.093789   0.352390406
4   -5.337459   0.007400576
5   -5.760159   0.000000000
6   -6.793630   0.000000000", header = T)

mylist <- list(df1, df2)

map(mylist, ~ .x %>% mutate(test = ifelse(log2.FC > 1 & qval < 0.05, "positive",
     ifelse(log2.FC < -1 & qval < 0.05, "negative", "NS"))))

#> [[1]]
#>   ID   log2.FC      qval     test
#> 1  1  5.221610 0.0000000 positive
#> 2  2  4.343830 0.0000000 positive
#> 3  3  3.764772 0.8625085       NS
#> 4  4 -3.095648 0.9412494       NS
#> 5  5 -3.489743 0.9047170       NS
#> 6  6 -3.648665 0.9412494       NS
#> 
#> [[2]]
#>   ID   log2.FC        qval     test
#> 1  1  6.287703 0.034547415 positive
#> 2  2  5.751197 0.007923771 positive
#> 3  3  5.093789 0.352390406       NS
#> 4  4 -5.337459 0.007400576 negative
#> 5  5 -5.760159 0.000000000 negative
#> 6  6 -6.793630 0.000000000 negative