Home > Back-end >  Create function that iterates across dataframe columns
Create function that iterates across dataframe columns

Time:03-22

I have a wide format dataframe which is arranged based on the minimum value for each column (high to low). The largest minimum value is column 1 and the smallest minimum value is the last column What I want to achieve is that the minimum value of each column coincides with the position of the corresponding value in the next column and so on.

Here is an example dataframe:

library(tidyverse)
library(data.table)

MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)

MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667) 
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)

df <- bind_rows(df1, df2, df3, df4, df5)

# Order based on each min value (high to low)
R_minvalues <- df %>%
  group_by(R_id) %>%                # group by recession id
  slice(which.min(MA_vol)) %>%      # extract min volume values for each recession
  select(R_id, MA_vol)

x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ]     # order by MA-vol min value (high to low)
R_id_order <- as.numeric(x$R_id)

# Reorder dataframe based on R_minvalues (high to low)
MRC_DF <- df %>%
  arrange(match(R_id, R_id_order)) %>%       # match R_id rows with R_id_order
  transform(t = 1:nrow(df)) %>%     # create t (time) column the length of the df
  select(t, R_id, MA_vol)                    # select columns

R_order_chr <- as.character(R_id_order)     # convert R_id_order to character so can rearrange columns

MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>%     # convert df to wide format 
  select(all_of(R_order_chr))       # rearrange column order

colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
  paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "")     # add "R_" to start of numbers so syntax is correct

The following code produces the desired outcome, but it only does one column at a time and requires manual input (specify column name):

# identify positional index of minimum value and corresponding closest value in next column 
a <- which.min(MRC_DF_wide$R15)     # position of min value in 1st column 
b <-
  which.min(abs(MRC_DF_wide$R13 - min(MRC_DF_wide$R15, na.rm = TRUE)))     # position of closest value in 2nd column 
                # 2nd column           # 1st column

c <- b - a     # positional index difference 

# shift column rows up 
shift <- function(x, n){
  c(x[-(seq(n))], rep(NA, n))
}

MRC_DF_wide$R13 <- shift(MRC_DF_wide$R13, c)     # shift 2nd column up by positional index difference 

I would like to create a function that iterates over columns 1 & 2, then 2 & 3 and so on for ncol of the dataframe. This is my attempt which highlights the column id but it is unsuccessful:

matching.strip.fn <- function(df) {
  min_index <- which.min(df[[i]])     # positional index of min value in 1st column
  match_index <- which.min(abs(df[[i 1]] - min(df[[i]], na.rm = TRUE)))     # positional index of closest value in 2nd column
                            # 2nd column       1st column 
  index_diff <- match_index - min_index     # positional index difference 
  
  df$i   1 <- c(df[-(seq(index_diff))], rep(NA, index_diff))     # shift values up by positional difference in 2nd column 
# 2nd column
}

Thanks in advance!

CodePudding user response:

I think you can solve this with purrr::accumulate() quite handily:

accumulate(MRC_DF_wide, \(.x, .y) {
  .y <- .y[!is.na(.y)]
  pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
  c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
})  |>
  set_names(names(MRC_DF_wide)) |>
  as.data.frame() %>%
  filter(apply(., 1, \(x) ! all(is.na(x))))
#>          R15       R13        R4        R8        R2
#> 1  0.2486667        NA        NA        NA        NA
#> 2  0.2463333        NA        NA        NA        NA
#> 3  0.2426667        NA        NA        NA        NA
#> 4  0.2423333        NA        NA        NA        NA
#> 5  0.2376667        NA        NA        NA        NA
#> 6  0.2323333        NA        NA        NA        NA
#> 7  0.2270000        NA        NA        NA        NA
#> 8  0.2246667        NA        NA        NA        NA
#> 9  0.2216667        NA        NA        NA        NA
#> 10 0.2203333        NA        NA        NA        NA
#> 11 0.2183333        NA        NA        NA        NA
#> 12 0.2126667        NA        NA        NA        NA
#> 13 0.2076667 0.2073333        NA 0.2180000        NA
#> 14 0.2060000 0.2053333 0.2016667 0.2146667        NA
#> 15        NA 0.2013333 0.1996667 0.2126667        NA
#> 16        NA 0.1993333 0.1980000 0.2103333        NA
#> 17        NA 0.1973333 0.1970000 0.2070000        NA
#> 18        NA 0.1970000 0.1963333 0.2040000        NA
#> 19        NA 0.1966667 0.1956667 0.2010000        NA
#> 20        NA 0.1946667 0.1930000 0.1993333        NA
#> 21        NA 0.1920000 0.1913333 0.1956667 0.2096667
#> 22        NA 0.1890000 0.1900000 0.1950000 0.2063333
#> 23        NA 0.1883333 0.1893333 0.1926667 0.2030000
#> 24        NA 0.1866667 0.1890000 0.1920000 0.1993333
#> 25        NA 0.1843333 0.1863333 0.1896667 0.1953333
#> 26        NA 0.1823333 0.1853333 0.1890000 0.1916667
#> 27        NA 0.1810000 0.1820000 0.1856667 0.1880000
#> 28        NA        NA 0.1800000 0.1830000 0.1870000
#> 29        NA        NA 0.1780000 0.1786667 0.1850000
#> 30        NA        NA 0.1763333 0.1763333 0.1830000
#> 31        NA        NA        NA 0.1733333 0.1783333
#> 32        NA        NA        NA 0.1720000 0.1753333
#> 33        NA        NA        NA 0.1700000 0.1726667
#> 34        NA        NA        NA 0.1686667 0.1716667
#> 35        NA        NA        NA 0.1670000 0.1673333
#> 36        NA        NA        NA        NA 0.1666667
#> 37        NA        NA        NA        NA 0.1656667

Created on 2022-03-21 by the reprex package (v2.0.1)

  • Related