I am trying to use the pivot longer function, except for my date-time
column, and except for the columns that have "NA"
's in them. Previously I was using this code because instead of choosing not to pivot columns with NA
's I was choosing not to pivot with columns that had the name stationary
in it's title.
results <-
as_tibble(LaHave_WA2021) %>%
# Pivot non-stationary loggers
pivot_longer(
-c(Date_Time_GMT_3, ends_with("Stationary")), #this is the line I need to change
names_to = "response",
values_to = "y"
)
I was thinking of using !contains
, but I think that is looking if the column name (not the values within it) does not contain NA
, for e.g.
results <-
as_tibble(LaHave_WA2021) %>%
# Pivot non-stationary loggers
pivot_longer(
-c(Date_Time_GMT_3, !contains("NA")), #changed to contains here
names_to = "response",
values_to = "y"
)
Any ideas how I can do this?
here is a sample of my datafram
structure(list(Date_Time_GMT_3 = structure(c(1622552400, 1622553300,
1622554200, 1622555100, 1622556000, 1622556900), class = c("POSIXct",
"POSIXt"), tzone = "EST"), X20822244_27LH_Stationary = c(13.173,
13.269, 13.269, 13.269, 13.269, 13.269), X20874232_B2LH_Stationary = c(14.517,
14.517, 14.517, 14.517, 14.517, 14.517), X20874286_X1LH_Stationary = c(13.558,
13.558, 13.558, 13.654, 13.654, 13.75), X2322844_X1LH_AIR = c(12.11,
12.207, 12.304, 12.401, 12.401, 12.594), X20817728_X3LH_Stationary = c(14.709,
14.709, 14.709, 14.709, 14.804, 14.804), X20676884_X3LH_U_StationaryCompare = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819743_X6LH_Stationary = c(15.282,
15.091, 15.282, 15.091, 14.996, 15.187), X20676900_50LH_U = c(12.207,
12.207, 12.207, 12.207, 12.207, 12.207), X20822214_73LH_U = c(12.401,
12.304, 12.304, 12.304, 12.304, 12.401), X20676887_44LH_U = c(11.722,
11.722, 11.722, 11.722, 11.722, 11.722), X20822223_46LH_U = c(12.401,
12.401, 12.401, 12.401, 12.497, 12.497), X20874231_56LH_U = c(15.282,
15.187, 15.187, 15.187, 15.187, 15.187), X20874287_86LH_U = c(11.722,
11.722, 11.722, 11.722, 11.819, 11.819), X20874298_71LH_U = c(12.69,
12.594, 12.594, 12.594, 12.594, 12.594), X20874309_51LH_U = c(12.594,
12.594, 12.69, 12.69, 12.69, 12.787), X20676887_X2LH_S = c(16.9674670250765,
16.9674670250765, 16.9674670250765, 16.9674670250765, 17.0422755866248,
17.0422755866248), X20819831_11LH_S = c(12.8720832098541, 12.8720832098541,
12.8720832098541, 12.8720832098541, 12.8720832098541, 12.8720832098541
), X20822214_X4LH_S = c(17.5627317604855, 17.4148767005618, 17.5627317604855,
17.4148767005618, 17.3413362257306, 17.4891912856543), X20822223_B3LH_S = c(19.4491451568519,
19.4491451568519, 19.4491451568519, 19.4491451568519, 19.4491451568519,
19.4491451568519), X20874231_62LH_S = c(15.271566973638, 15.271566973638,
15.271566973638, 15.271566973638, 15.352583630419, 15.352583630419
), X20874298_B5LH_S = c(16.18926182318, 16.0019888653251, 16.18926182318,
16.0019888653251, 15.9088426297429, 16.0961155875977), X20874309_B4LH_S = c(16.4017335880702,
16.4017335880702, 16.4017335880702, 16.4017335880702, 16.4017335880702,
16.4017335880702), X20676900_X5LH_S = c(13.1517913863509, 13.1517913863509,
13.1517913863509, 13.2570884294554, 13.2570884294554, 13.3623854725599
), X20874287_58LH_U = c(12.913887422357, 12.913887422357, 12.913887422357,
13.0000515069191, 13.0000515069191, 13.0862155914811), X20676900_54LH_S = c(11.405378599334,
11.405378599334, 11.405378599334, 11.5019113471797, 11.5019113471797,
11.5984440950255), X20819831_B1LH_S = c(16.3133453742116, 16.3133453742116,
16.3133453742116, 16.3133453742116, 16.3133453742116, 16.3133453742116
), X20822214_76LH_S = c(12.704891245155, 12.704891245155, 12.704891245155,
12.7949447496304, 12.7949447496304, 12.8849982541058), X20874231_31LH_S = c(13.8728743143662,
13.8728743143662, 13.8728743143662, 13.8728743143662, 13.9537598661273,
13.9537598661273), X20874298_55LHA_S = c(15.4572659453372, 15.2879259022681,
15.4572659453372, 15.2879259022681, 15.2036991792756, 15.3730392223447
), X20822223_55LHB_S = c(15.4378290607062, 15.2700462580074,
15.4378290607062, 15.2700462580074, 15.1865940786546, 15.3543768813534
), X20874287_70LH_S = c(16.429020745951, 16.429020745951, 16.429020745951,
16.429020745951, 16.505152496343, 16.505152496343), X20874309_69LH_S = c(12.693578938679,
12.693578938679, 12.693578938679, 12.7722769090551, 12.7722769090551,
12.8509748794313), X10694645_20LH_S = c(14.1427870767824, 14.2416357374311,
14.2416357374311, 14.2416357374311, 14.2416357374311, 14.2416357374311
), X20676900_57LH_S = c(14.4625399465288, 14.5352849038226, 14.5352849038226,
14.5352849038226, 14.5352849038226, 14.5352849038226), X20819831_3LH_S = c(13.7114345882018,
13.8015037541561, 13.8015037541561, 13.8015037541561, 13.8015037541561,
13.8015037541561), X20822223_4LH_S = c(14.5467983081353, 14.628865937343,
14.628865937343, 14.628865937343, 14.628865937343, 14.628865937343
), X20874231_79LH_S = c(13.5990052211839, 13.5990052211839, 13.5990052211839,
13.5990052211839, 13.5990052211839, 13.5990052211839), X20874287_67LH_S = c(16.7919014799185,
16.7919014799185, 16.7919014799185, 16.7919014799185, 16.8571822586659,
16.8571822586659), X20874298_47LH_S = c(13.1257581381716, 13.2199456919256,
13.2199456919256, 13.2199456919256, 13.2199456919256, 13.2199456919256
), X20874309_74LH_S = c(14.2622498116035, 14.2622498116035, 14.2622498116035,
14.2622498116035, 14.3482543034687, 14.3482543034687), X20676887_49LH_S = c(12.3859934583537,
12.3859934583537, 12.3859934583537, 12.461253579944, 12.461253579944,
12.5365137015344)), row.names = c(NA, 6L), class = "data.frame")
CodePudding user response:
We may select
to remove the columns that have all NAs, and this will remove those columns as part of the output
library(dplyr)
library(tidyr)
LaHave_WA2021 %>%
select(where(~ !all(is.na(.x)))) %>%
pivot_longer(cols = -c(Date_Time_GMT_3), names_to = 'response',
values_to = 'y')
-output
# A tibble: 240 × 3
Date_Time_GMT_3 response y
<dttm> <chr> <dbl>
1 2021-06-01 08:00:00 X20822244_27LH_Stationary 13.2
2 2021-06-01 08:00:00 X20874232_B2LH_Stationary 14.5
3 2021-06-01 08:00:00 X20874286_X1LH_Stationary 13.6
4 2021-06-01 08:00:00 X2322844_X1LH_AIR 12.1
5 2021-06-01 08:00:00 X20817728_X3LH_Stationary 14.7
6 2021-06-01 08:00:00 X20819743_X6LH_Stationary 15.3
7 2021-06-01 08:00:00 X20676900_50LH_U 12.2
8 2021-06-01 08:00:00 X20822214_73LH_U 12.4
9 2021-06-01 08:00:00 X20676887_44LH_U 11.7
10 2021-06-01 08:00:00 X20822223_46LH_U 12.4
# … with 230 more rows
If we want to remove the NA
rows, it is also possible with values_drop_na = TRUE
LaHave_WA2021 %>%
pivot_longer(cols = -c(Date_Time_GMT_3), names_to = 'response',
values_to = 'y', values_drop_na = TRUE)
-output
# A tibble: 240 × 3
Date_Time_GMT_3 response y
<dttm> <chr> <dbl>
1 2021-06-01 08:00:00 X20822244_27LH_Stationary 13.2
2 2021-06-01 08:00:00 X20874232_B2LH_Stationary 14.5
3 2021-06-01 08:00:00 X20874286_X1LH_Stationary 13.6
4 2021-06-01 08:00:00 X2322844_X1LH_AIR 12.1
5 2021-06-01 08:00:00 X20817728_X3LH_Stationary 14.7
6 2021-06-01 08:00:00 X20819743_X6LH_Stationary 15.3
7 2021-06-01 08:00:00 X20676900_50LH_U 12.2
8 2021-06-01 08:00:00 X20822214_73LH_U 12.4
9 2021-06-01 08:00:00 X20676887_44LH_U 11.7
10 2021-06-01 08:00:00 X20822223_46LH_U 12.4
# … with 230 more rows
CodePudding user response:
Try:
results <-
as_tibble(LaHave_WA2021) %>%
pivot_longer(
-c(Date_Time_GMT_3, where(anyNA)), # or where(~ all(is.na(.)))
names_to = "response",
values_to = "y"
)
Output:
results[1:10, ]
# A tibble: 10 × 4
Date_Time_GMT_3 X20676884_X3LH_U_StationaryCompare response y
<dttm> <dbl> <chr> <dbl>
1 2021-06-01 08:00:00 NA X20822244_27LH_Stationary 13.2
2 2021-06-01 08:00:00 NA X20874232_B2LH_Stationary 14.5
3 2021-06-01 08:00:00 NA X20874286_X1LH_Stationary 13.6
4 2021-06-01 08:00:00 NA X2322844_X1LH_AIR 12.1
5 2021-06-01 08:00:00 NA X20817728_X3LH_Stationary 14.7
6 2021-06-01 08:00:00 NA X20819743_X6LH_Stationary 15.3
7 2021-06-01 08:00:00 NA X20676900_50LH_U 12.2
8 2021-06-01 08:00:00 NA X20822214_73LH_U 12.4
9 2021-06-01 08:00:00 NA X20676887_44LH_U 11.7
10 2021-06-01 08:00:00 NA X20822223_46LH_U 12.4
This is excluding columns from pivoting which have any missing values. If you're looking to exclude only columns that only have missing values, you could replace the where
part with e.g. where(~ all(is.na(.)))
.
CodePudding user response:
We could use the values_drop_na = TRUE
argument:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(
-Date_Time_GMT_3,
names_to = "response",
values_to = "y",
values_drop_na = TRUE
)
Date_Time_GMT_3 response y
<dttm> <chr> <dbl>
1 2021-06-01 08:00:00 X20822244_27LH_Stationary 13.2
2 2021-06-01 08:00:00 X20874232_B2LH_Stationary 14.5
3 2021-06-01 08:00:00 X20874286_X1LH_Stationary 13.6
4 2021-06-01 08:00:00 X2322844_X1LH_AIR 12.1
5 2021-06-01 08:00:00 X20817728_X3LH_Stationary 14.7
6 2021-06-01 08:00:00 X20819743_X6LH_Stationary 15.3
7 2021-06-01 08:00:00 X20676900_50LH_U 12.2
8 2021-06-01 08:00:00 X20822214_73LH_U 12.4
9 2021-06-01 08:00:00 X20676887_44LH_U 11.7
10 2021-06-01 08:00:00 X20822223_46LH_U 12.4
# … with 230 more rows
Another verbose solution is:
library(dplyr)
library(tidyr)
library(purrr)
with_na <- df %>%
purrr::map_df(~sum(is.na(.))) %>%
select(where(~sum(.) != 0)) %>%
colnames()
df %>%
pivot_longer(
-c(Date_Time_GMT_3, with_na),
names_to = "response",
values_to = "y"
) %>%
select(contains("Date"), response, y)
Date_Time_GMT_3 response y
<dttm> <chr> <dbl>
1 2021-06-01 08:00:00 X20822244_27LH_Stationary 13.2
2 2021-06-01 08:00:00 X20874232_B2LH_Stationary 14.5
3 2021-06-01 08:00:00 X20874286_X1LH_Stationary 13.6
4 2021-06-01 08:00:00 X2322844_X1LH_AIR 12.1
5 2021-06-01 08:00:00 X20817728_X3LH_Stationary 14.7
6 2021-06-01 08:00:00 X20819743_X6LH_Stationary 15.3
7 2021-06-01 08:00:00 X20676900_50LH_U 12.2
8 2021-06-01 08:00:00 X20822214_73LH_U 12.4
9 2021-06-01 08:00:00 X20676887_44LH_U 11.7
10 2021-06-01 08:00:00 X20822223_46LH_U 12.4
# … with 230 more rows
# ℹ Use `print(n = ...)` to see more rows