Home > OS >  Apply %in% on a list with lapply
Apply %in% on a list with lapply

Time:09-18

I would like observe which value from the vector file "Spec" are absent in df$Species with this function :

Spec[!Spec %in% df$Species]

The function works properly on a dataset. However, I would like to apply this function on the different groups of the dataset with df %>% group_split(`East-West`, Transect, Station) from the package dyplr. But I do not know how to apply the the %in% formula for a list ( I suppose it is possible with map() or lapply(), but I do not succeed to implement it).

Here is the sample for df:

df <- structure(list(`East-West` = c("E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E"), Transect = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "6", 
"7"), class = "factor"), Station = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
15L, 16L, 16L), .Label = c("1", "10", "11", "12", "14", "16", 
"17", "18", "2", "20", "22", "23", "24", "3", "4", "5", "7", 
"8", "9"), class = "factor"), Species = c("Calanus finmarchicus", 
"Calanus glacialis", "Calanus hyperboreus", "Calanus spp.", "Copepoda", 
"Metridia longa", "Microcalanus pusillus", "Microcalanus pygmaeus", 
"Microcalanus spp.", "Microsetella norvegica", "Oithona similis", 
"Oithona spp.", "Oncaea borealis", "Pareuchaeta glacialis", "Pseudocalanus spp.", 
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus", 
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus", 
"Microcalanus spp.", "Microsetella norvegica", "Oithona similis", 
"Oithona spp.", "Oncaea borealis", "Pseudocalanus spp.", "Calanus finmarchicus", 
"Calanus glacialis", "Calanus hyperboreus", "Calanus spp.", "Copepoda", 
"Metridia longa", "Microcalanus pusillus", "Microcalanus spp.", 
"Microsetella norvegica", "Oithona similis", "Oithona spp.", 
"Oncaea borealis", "Pseudocalanus spp.", "Calanoida indet.", 
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus", 
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus", 
"Microcalanus pygmaeus", "Microcalanus spp.", "Oithona similis", 
"Oithona spp.", "Oncaea borealis", "Pareuchaeta glacialis", "Pseudocalanus spp.", 
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus", 
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus", 
"Microcalanus pygmaeus", "Microcalanus spp.", "Microsetella norvegica", 
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis", 
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.", 
"Calanoida indet.", "Calanus finmarchicus", "Calanus glacialis", 
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa", 
"Microcalanus pygmaeus", "Microcalanus spp.", "Microsetella norvegica", 
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis", 
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus minutus", 
"Pseudocalanus spp.", "Calanus finmarchicus", "Calanus glacialis", 
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa", 
"Microcalanus pusillus", "Microcalanus spp.", "Microsetella norvegica", 
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis", 
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.", 
"Calanoida indet.", "Calanus finmarchicus", "Calanus glacialis", 
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa", 
"Microcalanus pusillus", "Microcalanus pygmaeus", "Microcalanus spp.", 
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis", 
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.", 
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus", 
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus", 
"Microcalanus pygmaeus", "Microcalanus spp.", "Oithona similis", 
"Oithona spp.", "Oncaea borealis", "Pseudocalanus minutus", "Pseudocalanus spp.", 
"Calanus finmarchicus", "Calanus glacialis"), frequency = c(0.00341821910784481, 
0.00170910955392241, 0.0153819859853017, 0.185865663989062, 0.0358913006323705, 
0.0109383011451034, 0.0141001538198599, 0.00273457528627585, 
0.0769099299265083, 0.00683643821568963, 0.421295505041873, 0.151256195522133, 
0.0598188343872842, 0.000170910955392241, 0.0136728764313793, 
0.000794791061657445, 0.00122275547947299, 0.00550239965762847, 
0.0516614190077339, 0.019197261027726, 0.00122275547947299, 0.00733653287683795, 
0.0482377036652095, 0.00122275547947299, 0.102711460275731, 0.096842233974261, 
0.0215204964387247, 0.0128389325344664, 0.00794791061657445, 
0.00122275547947299, 0.0103934215755204, 0.10897808210803, 0.00843701280836365, 
6.11377739736496e-05, 0.0168740256167273, 0.0753217375355363, 
0.00183413321920949, 0.0755968575184177, 0.0646226270901476, 
0.230642252315593, 0.0277565493840369, 0.0247582205029014, 0.0154738878143133, 
0.00125725338491296, 0.00967117988394584, 0.151257253384913, 
0.0431334622823984, 0.000967117988394584, 0.106769825918762, 
0.0201160541586074, 0.0889748549323017, 0.15348162475822, 0.179303675048356, 
0.186847195357834, 0.000193423597678917, 0.0177949709864603, 
0.00635862653666808, 0.0026494277236117, 0.00423908435777872, 
0.0258584145824502, 0.0491733785502332, 0.0105977108944468, 0.0292496820686732, 
0.0426557863501484, 0.233997456549385, 0.00715345485375159, 0.00105977108944468, 
0.0475307333615939, 0.0320050869012293, 0.47769181856719, 0.000105977108944468, 
0.00211954217888936, 0.0275540483255617, 0.000106820488169631, 
0.0144207659029002, 0.00117502536986594, 0.0027773326924104, 
0.0384553757410671, 0.0246755327671847, 0.00769107514821343, 
0.117288896010255, 0.103028360839609, 0.00320461464508893, 0.00106820488169631, 
0.0616888319179619, 0.0893019281098115, 0.511029215403514, 0.000907974149441863, 
0.00160230732254446, 0.00598194733749933, 0.0155957912727661, 
0.00940733772342427, 0.00564440263405456, 0.00752587017873942, 
0.026340545625588, 0.0914393226716839, 0.0244590780809031, 0.051928504233302, 
0.190968955785513, 0.0112888052681091, 0.00188146754468485, 0.0658513640639699, 
0.151458137347131, 0.335841956726246, 0.000752587017873942, 0.000752587017873942, 
0.0244590780809031, 0.0569502556950256, 0.00929800092980009, 
0.00232450023245002, 0.00348675034867503, 0.0232450023245002, 
0.0854253835425383, 0.0168526266852627, 0.0122036262203626, 0.049395629939563, 
0.337633658763366, 0.00116225011622501, 0.097629009762901, 0.134239888423989, 
0.162715016271502, 0.000464900046490005, 0.00116225011622501, 
0.00581125058112506, 0.0285089803288036, 0.00475149672146726, 
0.00950299344293453, 0.0883778390192911, 0.246602679844151, 0.00380119737717381, 
0.039912572460325, 0.0585384396084767, 0.244892141024423, 0.121258196331845, 
0.0244226931483417, 0.117076879216953, 0.00665209541005417, 0.00570179606576071, 
0.0053156146179402, 0.00132890365448505)), row.names = c(NA, 
-140L), class = c("tbl_df", "tbl", "data.frame"))

And Spec:

Spec <- c("Acartia spp.", "Acartia longiremis", "Calanus spp.", "Calanus finmarchicus", 
"Calanus glacialis", "Calanus hyperboreus", "Microcalanus spp.", 
"Microcalanus pygmaeus", "Microcalanus pusillus", "Metridia longa", 
"Oncaea borealis", "Oithona similis", "Oithona atlantica", "Oithona spp.", 
"Pseudocalanus spp.", "Pareuchaeta spp.", "Pareuchaeta glacialis", 
"Calanoida indet.", "Copepoda", "Microsetella norvegica", "Pseudocalanus minutus", 
"Pseudocalanus acuspes", "Bradyidius similis", "Centropages spp.", 
"Pareuchaeta norvegica")

Edit: I do not want to remove anything from df, I want to extract the values given in the vector "Spec" which are not present in df$Species, and that for different groups. With that I could generated a list of the non-present values for each groups.

CodePudding user response:

You may use setdiff in by. Notice that by is basically split lapply.

by(df$Species, Reduce(paste, df[1:3]), setdiff, x=Spec)

Or maybe this gives nicer group names:

df |>
  transform(g=Reduce(paste, df[1:3])) |>
  with(by(Species, g, setdiff, x=Spec))
# g: E 1 1
# [1] "Acartia spp."          "Acartia longiremis"    "Oithona atlantica"    
# [4] "Pareuchaeta spp."      "Calanoida indet."      "Pseudocalanus minutus"
# [7] "Pseudocalanus acuspes" "Bradyidius similis"    "Centropages spp."     
# [10] "Pareuchaeta norvegica"
# ---------------------------------------------------------- 
#   g: E 1 2
# [1] "Acartia spp."          "Acartia longiremis"    "Microcalanus pygmaeus"
# [4] "Oithona atlantica"     "Pareuchaeta spp."      "Pareuchaeta glacialis"
# [7] "Calanoida indet."      "Pseudocalanus minutus" "Pseudocalanus acuspes"
# [10] "Bradyidius similis"    "Centropages spp."      "Pareuchaeta norvegica"
# ---------------------------------------------------------- 
#   g: E 1 4
# [1] "Acartia spp."           "Acartia longiremis"    
# [3] "Oithona atlantica"      "Pareuchaeta spp."      
# [5] "Microsetella norvegica" "Pseudocalanus minutus" 
# [7] "Pseudocalanus acuspes"  "Bradyidius similis"    
# [9] "Centropages spp."       "Pareuchaeta norvegica" 
# ---------------------------------------------------------- 
#   g: E 1 5
# [1] "Acartia spp."          "Acartia longiremis"    "Calanoida indet."     
# [4] "Pseudocalanus minutus" "Pseudocalanus acuspes" "Bradyidius similis"   
# [7] "Centropages spp."      "Pareuchaeta norvegica"
# ---------------------------------------------------------- 
#   g: E 2 1
# [1] "Acartia spp."          "Acartia longiremis"    "Microcalanus pusillus"
# [4] "Pseudocalanus acuspes" "Bradyidius similis"    "Centropages spp."     
# [7] "Pareuchaeta norvegica"
# ---------------------------------------------------------- 
#   g: E 2 11
# [1] "Acartia spp."          "Acartia longiremis"    "Microcalanus pygmaeus"
# [4] "Calanoida indet."      "Pseudocalanus minutus" "Pseudocalanus acuspes"
# [7] "Bradyidius similis"    "Centropages spp."      "Pareuchaeta norvegica"
# ---------------------------------------------------------- 
#   g: E 2 12
# [1] "Acartia spp."           "Acartia longiremis"     "Microsetella norvegica"
# [4] "Pseudocalanus minutus"  "Pseudocalanus acuspes"  "Bradyidius similis"    
# [7] "Centropages spp."       "Pareuchaeta norvegica" 
# ---------------------------------------------------------- 
#   g: E 2 4
# [1] "Acartia spp."           "Acartia longiremis"    
# [3] "Oithona atlantica"      "Pareuchaeta spp."      
# [5] "Pareuchaeta glacialis"  "Calanoida indet."      
# [7] "Microsetella norvegica" "Pseudocalanus acuspes" 
# [9] "Bradyidius similis"     "Centropages spp."      
# [11] "Pareuchaeta norvegica" 
# ---------------------------------------------------------- 
#   g: E 2 5
# [1] "Acartia spp."           "Acartia longiremis"    
# [3] "Calanus spp."           "Calanus hyperboreus"   
# [5] "Microcalanus spp."      "Microcalanus pygmaeus" 
# [7] "Microcalanus pusillus"  "Metridia longa"        
# [9] "Oncaea borealis"        "Oithona similis"       
# [11] "Oithona atlantica"      "Oithona spp."          
# [13] "Pseudocalanus spp."     "Pareuchaeta spp."      
# [15] "Pareuchaeta glacialis"  "Calanoida indet."      
# [17] "Copepoda"               "Microsetella norvegica"
# [19] "Pseudocalanus minutus"  "Pseudocalanus acuspes" 
# [21] "Bradyidius similis"     "Centropages spp."      
# [23] "Pareuchaeta norvegica" 

You may also try paste0 instead of paste which avoids whitespace within the group names. If you depend on %in% you could also do sth like by(df, Reduce(paste, df[1:3]), \(x) Spec[!Spec %in% x$Species]), but setdiff is more concise.

  • Related