Home > Mobile >  Filter for Row from Multiple Data Frame in R
Filter for Row from Multiple Data Frame in R

Time:04-10

I have five data frames with the same dimension (8 by 2) but with different column names as follows:

nbb <- (
nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795)
)

mbb <- (
mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)

cbb <- (
cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)

tmbb <- (
tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279)
)

tcbb <- (
tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
)

I want to create a new data frame that will contain each row that contains minimum RMSE in the five(5) data frames above. If I make the row names to be the name of different data frames as (nbb, mbb, cbb, tmbb, tcbb)

rownames(df) <- c("nbb", "mbb", "cbb", "tmbb", "tcbb")

I desire to have a result like the bellow:

df lb RMSE
nbb 9 0.2617795
mbb 7 0.1129459
cbb 9 0.02452239
tmbb 4 0.08290467
tcbb 8 0.4387557

CodePudding user response:

First combine your 5 data frames into a list, then use lapply to go through all your data frames and output the necessary information. Also, wrap the lapply() with a do.call to row bind (rbind) the results. Finally, change the result into a data frame and covert the rownames to column names

You can skip rownames_to_column("df") if you want to have row names instead of a column storing the dataframe names.

library(tibble)

df_list <- list(nbb = nbb, mbb = mbb, cbb = cbb, tmbb = tmbb, tcbb = tcbb)

do.call(rbind, lapply(df_list, function(x) data.frame(lb = x[which.min(x[,2]), 1], RMSE = min(x[, 2])))) %>%
  rownames_to_column("df")

    df lb       RMSE
1  nbb  9 0.26177950
2  mbb  7 0.11294590
3  cbb  9 0.02452239
4 tmbb  4 0.08290467
5 tcbb  9 0.41488540

Input data

Moreover, note that the way you define dataframe in your post is not correct, I've included the "correct" way here for reference.

nbb <- data.frame(
  nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))

mbb <- data.frame( mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)

cbb <- data.frame(
  cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)

tmbb <- data.frame(
  tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))

tcbb <- data.frame(
  tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
  )

CodePudding user response:

If you are open to a purrr approach, you could use

library(purrr)
library(dplyr)

my_list %>% 
  map_dfr(~.x %>% 
            filter(if_any(ends_with("_RMSE"), ~.x == min(.x))) %>% 
            rename_with(~gsub(".*_", "", .x)), 
          .id = "df") %>% 
  bind_rows()

this returns

    df lb       RMSE
1  cbb  9 0.02452239
2  mbb  7 0.11294590
3  nbb  9 0.26177950
4 tcbb  9 0.41488540
5 tmbb  4 0.08290467

with (borrowed from Rui Barradas)

my_list <- mget(ls(pattern = "bb$"))

CodePudding user response:

Here is a base R way.
First put the data.frames in a list with mget, then lapply the list an anonymous function outputting the minimum RMSE row. Bind the output rows and reorder according to the wanted row names.

nbb <- data.frame(
  nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795)
)

mbb <- data.frame(
  mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)

cbb <- data.frame(
  cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)

tmbb <- data.frame(
  tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279)
)

tcbb <- data.frame(
  tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
  tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
)

df_list <- mget(ls(pattern = "bb$"))

tmp <- lapply(df_list, \(x){
  i <- which.min(x[[2]])
  if(length(i) > 0L) {
    data.frame(lb = x[i, 1], RMSE = x[i, 2])
  } else NULL
})
res <- do.call(rbind, tmp)
rm(tmp)
res <- cbind.data.frame(df = names(df_list), res)

i <- order(c("nbb", "mbb", "cbb", "tmbb", "tcbb"))
res <- res[i,]
res
#>        df lb       RMSE
#> nbb   nbb  9 0.26177950
#> mbb   mbb  7 0.11294590
#> cbb   cbb  9 0.02452239
#> tmbb tmbb  4 0.08290467
#> tcbb tcbb  9 0.41488540

Created on 2022-04-10 by the reprex package (v2.0.1)

  • Related