I have five data frame
s with the same dimension (8 by 2) but with different column name
s as follows:
nbb <- (
nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795)
)
mbb <- (
mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)
cbb <- (
cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)
tmbb <- (
tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279)
)
tcbb <- (
tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
)
I want to create a new data frame
that will contain each row that contains minimum
RMSE
in the five(5) data frame
s above. If I make the row names
to be the name of different data frame
s as (nbb, mbb, cbb, tmbb, tcbb)
rownames(df) <- c("nbb", "mbb", "cbb", "tmbb", "tcbb")
I desire to have a result like the bellow:
df | lb | RMSE |
---|---|---|
nbb | 9 | 0.2617795 |
mbb | 7 | 0.1129459 |
cbb | 9 | 0.02452239 |
tmbb | 4 | 0.08290467 |
tcbb | 8 | 0.4387557 |
CodePudding user response:
First combine your 5 data frames into a list, then use lapply
to go through all your data frames and output the necessary information. Also, wrap the lapply()
with a do.call
to row bind (rbind
) the results. Finally, change the result into a data frame and covert the rownames to column names
You can skip rownames_to_column("df")
if you want to have row names instead of a column storing the dataframe names.
library(tibble)
df_list <- list(nbb = nbb, mbb = mbb, cbb = cbb, tmbb = tmbb, tcbb = tcbb)
do.call(rbind, lapply(df_list, function(x) data.frame(lb = x[which.min(x[,2]), 1], RMSE = min(x[, 2])))) %>%
rownames_to_column("df")
df lb RMSE
1 nbb 9 0.26177950
2 mbb 7 0.11294590
3 cbb 9 0.02452239
4 tmbb 4 0.08290467
5 tcbb 9 0.41488540
Input data
Moreover, note that the way you define dataframe in your post is not correct, I've included the "correct" way here for reference.
nbb <- data.frame(
nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame( mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)
cbb <- data.frame(
cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)
tmbb <- data.frame(
tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(
tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
)
CodePudding user response:
If you are open to a purrr
approach, you could use
library(purrr)
library(dplyr)
my_list %>%
map_dfr(~.x %>%
filter(if_any(ends_with("_RMSE"), ~.x == min(.x))) %>%
rename_with(~gsub(".*_", "", .x)),
.id = "df") %>%
bind_rows()
this returns
df lb RMSE
1 cbb 9 0.02452239
2 mbb 7 0.11294590
3 nbb 9 0.26177950
4 tcbb 9 0.41488540
5 tmbb 4 0.08290467
with (borrowed from Rui Barradas)
my_list <- mget(ls(pattern = "bb$"))
CodePudding user response:
Here is a base R way.
First put the data.frames in a list with mget
, then lapply
the list an anonymous function outputting the minimum RMSE row. Bind the output rows and reorder according to the wanted row names.
nbb <- data.frame(
nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795)
)
mbb <- data.frame(
mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578)
)
cbb <- data.frame(
cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239)
)
tmbb <- data.frame(
tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279)
)
tcbb <- data.frame(
tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9),
tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854)
)
df_list <- mget(ls(pattern = "bb$"))
tmp <- lapply(df_list, \(x){
i <- which.min(x[[2]])
if(length(i) > 0L) {
data.frame(lb = x[i, 1], RMSE = x[i, 2])
} else NULL
})
res <- do.call(rbind, tmp)
rm(tmp)
res <- cbind.data.frame(df = names(df_list), res)
i <- order(c("nbb", "mbb", "cbb", "tmbb", "tcbb"))
res <- res[i,]
res
#> df lb RMSE
#> nbb nbb 9 0.26177950
#> mbb mbb 7 0.11294590
#> cbb cbb 9 0.02452239
#> tmbb tmbb 4 0.08290467
#> tcbb tcbb 9 0.41488540
Created on 2022-04-10 by the reprex package (v2.0.1)