Home > Mobile >  How to Identify the First and Last Non Zero Value in the Column and return both the location of both
How to Identify the First and Last Non Zero Value in the Column and return both the location of both

Time:12-19

How can I check the first and last non zero values in the columns and return its location from the data frame.

Attached is the data Frame I am working with.

structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
"tbl", "data.frame"))

Any suggestions for finding the location based on the check would be very helpful. Thank you.

CodePudding user response:

You can do this :

library(tidyverse)

some_data <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

first_and_last_non_zero_of_column <- function(df, var,
                                              rowlabel_var = "Row Labels") {
  if (identical(var, rowlabel_var)) {
    return(NULL)
  }

  nonzero <- select(
    df,
    {{ rowlabel_var }},
    {{ var }}
  ) |> filter(!!sym(var) > 0)
  enframe(list(
    first = slice_head(nonzero, n = 1),
    last = slice_tail(nonzero, n = 1)
  ), name = "order") |>
    unnest_wider(col = value) |>
    pivot_longer(
      cols = var,
      names_to = "variable_name"
    )
}

first_and_last_non_zero_of_column(some_data, "XYZ|551")

# if you have more than one variable to check
map_dfr(
  names(some_data),
  ~ first_and_last_non_zero_of_column(
    df = some_data,
    var = .x
  )
)

CodePudding user response:

With base R perhaps something like this:

# first non-zero
# which.min() for booleans is the 1st FALSE

first_nz <- which.min(df$`XYZ|551` == 0)
df[first_nz,]
#>    Row Labels XYZ|551
#> 10 2019-10-01     206

# last non-zero
# first reverse dataframe rows
df_rev <- df[nrow(df):1,]
# then same as before, but extract just the matching `Row Labels`
(last_nz_rowlbl <- df_rev$`Row Labels`[which.min(df_rev$`XYZ|551` == 0)])
#> [1] "2022-04-01"

df[df$`Row Labels` == last_nz_rowlbl,]
#>    Row Labels XYZ|551
#> 40 2022-04-01      33

Input:

df <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

Created on 2022-12-19 with reprex v2.0.2

  • Related