How to Identify the First and Last Non Zero Value in the Column and return both the location of both-CodePudding

How can I check the first and last non zero values in the columns and return its location from the data frame.

Attached is the data Frame I am working with.

structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
"tbl", "data.frame"))

Any suggestions for finding the location based on the check would be very helpful. Thank you.

CodePudding user response：

You can do this :

library(tidyverse)

some_data <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

first_and_last_non_zero_of_column <- function(df, var,
                                              rowlabel_var = "Row Labels") {
  if (identical(var, rowlabel_var)) {
    return(NULL)
  }

  nonzero <- select(
    df,
    {{ rowlabel_var }},
    {{ var }}
  ) |> filter(!!sym(var) > 0)
  enframe(list(
    first = slice_head(nonzero, n = 1),
    last = slice_tail(nonzero, n = 1)
  ), name = "order") |>
    unnest_wider(col = value) |>
    pivot_longer(
      cols = var,
      names_to = "variable_name"
    )
}

first_and_last_non_zero_of_column(some_data, "XYZ|551")

# if you have more than one variable to check
map_dfr(
  names(some_data),
  ~ first_and_last_non_zero_of_column(
    df = some_data,
    var = .x
  )
)

CodePudding user response：

With base R perhaps something like this:

# first non-zero
# which.min() for booleans is the 1st FALSE

first_nz <- which.min(df$`XYZ|551` == 0)
df[first_nz,]
#>    Row Labels XYZ|551
#> 10 2019-10-01     206

# last non-zero
# first reverse dataframe rows
df_rev <- df[nrow(df):1,]
# then same as before, but extract just the matching `Row Labels`
(last_nz_rowlbl <- df_rev$`Row Labels`[which.min(df_rev$`XYZ|551` == 0)])
#> [1] "2022-04-01"

df[df$`Row Labels` == last_nz_rowlbl,]
#>    Row Labels XYZ|551
#> 40 2022-04-01      33

Input:

df <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
                                "2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
                                "2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
                                "2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
                                "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
                                "2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
                                "2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
                                "2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
                                "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
                                "2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0, 
                                                                                         0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906, 
                                                                                         957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534, 
                                                                                         1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201, 
                                                                                         33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))

^{Created on 2022-12-19 with reprex v2.0.2}