I have a wide format
dataframe
which is arranged based on the minimum value for each column
(high to low). The largest minimum value is column 1
and the smallest minimum value is the last column
What I want to achieve is that the minimum value of each column
coincides with the position of the corresponding value in the next column
and so on.
Here is an example dataframe
:
library(tidyverse)
library(data.table)
MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667)
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)
df <- bind_rows(df1, df2, df3, df4, df5)
# Order based on each min value (high to low)
R_minvalues <- df %>%
group_by(R_id) %>% # group by recession id
slice(which.min(MA_vol)) %>% # extract min volume values for each recession
select(R_id, MA_vol)
x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ] # order by MA-vol min value (high to low)
R_id_order <- as.numeric(x$R_id)
# Reorder dataframe based on R_minvalues (high to low)
MRC_DF <- df %>%
arrange(match(R_id, R_id_order)) %>% # match R_id rows with R_id_order
transform(t = 1:nrow(df)) %>% # create t (time) column the length of the df
select(t, R_id, MA_vol) # select columns
R_order_chr <- as.character(R_id_order) # convert R_id_order to character so can rearrange columns
MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>% # convert df to wide format
select(all_of(R_order_chr)) # rearrange column order
colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "") # add "R_" to start of numbers so syntax is correct
The following code produces the desired outcome, but it only does one column at a time and requires manual input (specify column name):
# identify positional index of minimum value and corresponding closest value in next column
a <- which.min(MRC_DF_wide$R15) # position of min value in 1st column
b <-
which.min(abs(MRC_DF_wide$R13 - min(MRC_DF_wide$R15, na.rm = TRUE))) # position of closest value in 2nd column
# 2nd column # 1st column
c <- b - a # positional index difference
# shift column rows up
shift <- function(x, n){
c(x[-(seq(n))], rep(NA, n))
}
MRC_DF_wide$R13 <- shift(MRC_DF_wide$R13, c) # shift 2nd column up by positional index difference
I would like to create a function that iterates over columns 1 & 2, then 2 & 3 and so on for ncol
of the dataframe
. This is my attempt which highlights the column id but it is unsuccessful:
matching.strip.fn <- function(df) {
min_index <- which.min(df[[i]]) # positional index of min value in 1st column
match_index <- which.min(abs(df[[i 1]] - min(df[[i]], na.rm = TRUE))) # positional index of closest value in 2nd column
# 2nd column 1st column
index_diff <- match_index - min_index # positional index difference
df$i 1 <- c(df[-(seq(index_diff))], rep(NA, index_diff)) # shift values up by positional difference in 2nd column
# 2nd column
}
Thanks in advance!
CodePudding user response:
I think you can solve this with purrr::accumulate()
quite handily:
accumulate(MRC_DF_wide, \(.x, .y) {
.y <- .y[!is.na(.y)]
pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
}) |>
set_names(names(MRC_DF_wide)) |>
as.data.frame() %>%
filter(apply(., 1, \(x) ! all(is.na(x))))
#> R15 R13 R4 R8 R2
#> 1 0.2486667 NA NA NA NA
#> 2 0.2463333 NA NA NA NA
#> 3 0.2426667 NA NA NA NA
#> 4 0.2423333 NA NA NA NA
#> 5 0.2376667 NA NA NA NA
#> 6 0.2323333 NA NA NA NA
#> 7 0.2270000 NA NA NA NA
#> 8 0.2246667 NA NA NA NA
#> 9 0.2216667 NA NA NA NA
#> 10 0.2203333 NA NA NA NA
#> 11 0.2183333 NA NA NA NA
#> 12 0.2126667 NA NA NA NA
#> 13 0.2076667 0.2073333 NA 0.2180000 NA
#> 14 0.2060000 0.2053333 0.2016667 0.2146667 NA
#> 15 NA 0.2013333 0.1996667 0.2126667 NA
#> 16 NA 0.1993333 0.1980000 0.2103333 NA
#> 17 NA 0.1973333 0.1970000 0.2070000 NA
#> 18 NA 0.1970000 0.1963333 0.2040000 NA
#> 19 NA 0.1966667 0.1956667 0.2010000 NA
#> 20 NA 0.1946667 0.1930000 0.1993333 NA
#> 21 NA 0.1920000 0.1913333 0.1956667 0.2096667
#> 22 NA 0.1890000 0.1900000 0.1950000 0.2063333
#> 23 NA 0.1883333 0.1893333 0.1926667 0.2030000
#> 24 NA 0.1866667 0.1890000 0.1920000 0.1993333
#> 25 NA 0.1843333 0.1863333 0.1896667 0.1953333
#> 26 NA 0.1823333 0.1853333 0.1890000 0.1916667
#> 27 NA 0.1810000 0.1820000 0.1856667 0.1880000
#> 28 NA NA 0.1800000 0.1830000 0.1870000
#> 29 NA NA 0.1780000 0.1786667 0.1850000
#> 30 NA NA 0.1763333 0.1763333 0.1830000
#> 31 NA NA NA 0.1733333 0.1783333
#> 32 NA NA NA 0.1720000 0.1753333
#> 33 NA NA NA 0.1700000 0.1726667
#> 34 NA NA NA 0.1686667 0.1716667
#> 35 NA NA NA 0.1670000 0.1673333
#> 36 NA NA NA NA 0.1666667
#> 37 NA NA NA NA 0.1656667
Created on 2022-03-21 by the reprex package (v2.0.1)