How can I check the first and last non zero values in the columns and return its location from the data frame.
Attached is the data Frame I am working with.
structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
"tbl", "data.frame"))
Any suggestions for finding the location based on the check would be very helpful. Thank you.
CodePudding user response:
You can do this :
library(tidyverse)
some_data <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
"tbl", "data.frame"))
first_and_last_non_zero_of_column <- function(df, var,
rowlabel_var = "Row Labels") {
if (identical(var, rowlabel_var)) {
return(NULL)
}
nonzero <- select(
df,
{{ rowlabel_var }},
{{ var }}
) |> filter(!!sym(var) > 0)
enframe(list(
first = slice_head(nonzero, n = 1),
last = slice_tail(nonzero, n = 1)
), name = "order") |>
unnest_wider(col = value) |>
pivot_longer(
cols = var,
names_to = "variable_name"
)
}
first_and_last_non_zero_of_column(some_data, "XYZ|551")
# if you have more than one variable to check
map_dfr(
names(some_data),
~ first_and_last_non_zero_of_column(
df = some_data,
var = .x
)
)
CodePudding user response:
With base R perhaps something like this:
# first non-zero
# which.min() for booleans is the 1st FALSE
first_nz <- which.min(df$`XYZ|551` == 0)
df[first_nz,]
#> Row Labels XYZ|551
#> 10 2019-10-01 206
# last non-zero
# first reverse dataframe rows
df_rev <- df[nrow(df):1,]
# then same as before, but extract just the matching `Row Labels`
(last_nz_rowlbl <- df_rev$`Row Labels`[which.min(df_rev$`XYZ|551` == 0)])
#> [1] "2022-04-01"
df[df$`Row Labels` == last_nz_rowlbl,]
#> Row Labels XYZ|551
#> 40 2022-04-01 33
Input:
df <- structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|551` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 206, 1814, 2324, 772, 1116, 1636, 1906,
957, 829, 911, 786, 938, 1313, 2384, 1554, 1777, 1635, 1534,
1015, 827, 982, 685, 767, 511, 239, 1850, 1301, 426, 261, 201,
33, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -46L), class = c("tbl_df",
"tbl", "data.frame"))
Created on 2022-12-19 with reprex v2.0.2