Filtering by length of elements in list-CodePudding

I am trying to filter by the total length of an element in a list. I have a list of fish that have different lengths based on their size (each fish has a unique ID Example Ring20_4).

Example data

Is there a way to filter the list to only include fish at or above a certain length?

I have tried to filter by the distance in a dataset but not in a list.

CodePudding user response：

on a sample try you can you lapply with a filter function :

Assuming you need to filter on "Distance" or you can pick any other variable of choice;

With toy data similar to yours:

fish1 <- data.frame(ID = c(1, 2, 3,4), Distance = c(4, 5, 6,7))
fish2 <- data.frame(ID = c(3, 2, 1), Distance = c(6, 5, 4))
my.list <- list(fish1, fish2)
> my.list
[[1]]
  ID Distance
1  1        4
2  2        5
3  3        6
4  4        7

[[2]]
  ID Distance
1  3        6
2  2        5
3  1        4

Now use lapply with function of your choice or dplyr to keep it simple:

lapply(my.list, function(x) {x%>%filter(Distance >4)})

gives:

    [[1]]
  ID Distance
1  2        5
2  3        6
3  4        7

[[2]]
  ID Distance
1  3        6
2  2        5

CodePudding user response：

You may use subset without needing to define an anonymous function; just state your logical condition after a comma. You could use e.g. Distance >= 152.

Maybe you also want the data frames in the list to be similar in Distance; for this you could find out the intersect of all using Reduce.

(ist <- Reduce(intersect, lapply(dat, `[[`, 'Distance')))
# [1] 152 153 154 155 156 157 158

lapply(dat, subset, Distance %in% ist)
# $Ring20
#        ID   Group        Ca        Mg        Pb Distance
# 7  Ring20 Cohort7 0.2623415 0.4691522 0.1024886      152
# 10 Ring20 Cohort7 0.2777731 0.2646508 0.3798007      153
# 13 Ring20 Cohort7 0.1010572 0.8905015 0.3315291      154
# 16 Ring20 Cohort7 0.6462756 0.7646237 0.1972682      155
# 19 Ring20 Cohort7 0.4654179 0.2533853 0.8723405      156
# 22 Ring20 Cohort7 0.7749328 0.1730146 0.8511648      157
# 25 Ring20 Cohort7 0.1738499 0.6967800 0.8402523      158
# 
# $Ring21
#        ID   Group        Ca        Mg        Pb Distance
# 8  Ring21 Cohort7 0.4473003 0.6473545 0.1637797      152
# 11 Ring21 Cohort7 0.3415376 0.2277453 0.4414869      153
# 14 Ring21 Cohort7 0.1747264 0.6489947 0.4150966      154
# 17 Ring21 Cohort7 0.4128844 0.5326486 0.3894496      155
# 20 Ring21 Cohort7 0.8993456 0.4658919 0.4995926      156
# 23 Ring21 Cohort7 0.8108586 0.1528877 0.4171203      157
# 26 Ring21 Cohort7 0.6055719 0.8429932 0.8930175      158
# 
# $Ring22
#        ID   Group        Ca        Mg        Pb Distance
# 9  Ring22 Cohort7 0.8632929 0.3859472 0.3867365      152
# 12 Ring22 Cohort7 0.5813189 0.8674091 0.3321643      153
# 15 Ring22 Cohort7 0.4401683 0.5943181 0.5883385      154
# 18 Ring22 Cohort7 0.3447552 0.3506669 0.5622521      155
# 21 Ring22 Cohort7 0.3554302 0.3992802 0.5260029      156
# 24 Ring22 Cohort7 0.4238290 0.8708108 0.3332444      157
# 27 Ring22 Cohort7 0.2309405 0.5164069 0.8617562      158

Data:

dat <- list(Ring20 = structure(list(ID = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L), .Label = c("Ring20", "Ring21", "Ring22"), class = "factor"), 
    Group = c("Cohort7", "Cohort7", "Cohort7", "Cohort7", "Cohort7", 
    "Cohort7", "Cohort7", "Cohort7", "Cohort7"), Ca = c(0.808933091722429, 
    0.253822685219347, 0.262341501004994, 0.277773097716272, 
    0.101057219691575, 0.646275559253991, 0.465417906455696, 
    0.774932761304081, 0.17384985871613), Mg = c(0.704813529551029, 
    0.150053730420768, 0.469152207486331, 0.264650804549456, 
    0.890501521900296, 0.764623709768057, 0.253385251387954, 
    0.173014574125409, 0.696779993548989), Pb = c(0.505049292556942, 
    0.510254093259573, 0.102488566935062, 0.379800706543028, 
    0.331529086641967, 0.197268167696893, 0.872340547665954, 
    0.851164837926626, 0.840252296812832), Distance = 150:158), row.names = c(1L, 
4L, 7L, 10L, 13L, 16L, 19L, 22L, 25L), class = "data.frame"), 
    Ring21 = structure(list(ID = structure(c(2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L), .Label = c("Ring20", "Ring21", "Ring22"
    ), class = "factor"), Group = c("Cohort7", "Cohort7", "Cohort7", 
    "Cohort7", "Cohort7", "Cohort7", "Cohort7", "Cohort7", "Cohort7"
    ), Ca = c(0.219783174432814, 0.44730027820915, 0.341537550836802, 
    0.174726424366236, 0.412884406372905, 0.899345614202321, 
    0.810858589597046, 0.605571932904422, 0.421316545456648), 
        Mg = c(0.74920008610934, 0.647354506701231, 0.227745342440903, 
        0.648994675837457, 0.532648558728397, 0.465891929157078, 
        0.15288767144084, 0.842993192560971, 0.553889319673181
        ), Pb = c(0.865333008766174, 0.163779740966856, 0.441486882418394, 
        0.415096608921885, 0.389449576288462, 0.499592613242567, 
        0.417120317555964, 0.893017530441284, 0.218490941636264
        ), Distance = 151:159), row.names = c(5L, 8L, 11L, 14L, 
    17L, 20L, 23L, 26L, 29L), class = "data.frame"), Ring22 = structure(list(
        ID = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Ring20", 
        "Ring21", "Ring22"), class = "factor"), Group = c("Cohort7", 
        "Cohort7", "Cohort7", "Cohort7", "Cohort7", "Cohort7", 
        "Cohort7", "Cohort7", "Cohort7"), Ca = c(0.863292878679931, 
        0.58131894133985, 0.440168274194002, 0.344755161739886, 
        0.355430244468153, 0.423828981257975, 0.230940494127572, 
        0.597983909398317, 0.211120963841677), Mg = c(0.385947158373892, 
        0.867409144155681, 0.594318131357431, 0.350666918046773, 
        0.399280233494937, 0.87081084959209, 0.516406905651093, 
        0.819099545665085, 0.554987411014736), Pb = c(0.386736532114446, 
        0.332164319977164, 0.58833845667541, 0.562252142839134, 
        0.526002921164036, 0.333244423009455, 0.861756228841841, 
        0.704294651001692, 0.812992637604475), Distance = 152:160), row.names = c(9L, 
    12L, 15L, 18L, 21L, 24L, 27L, 30L, 33L), class = "data.frame"))