The following dataset contains 7 columns (i.e., AI_1 until AI_7) that have 1440 observations per ID (in total 42 IDs). I want to create a dataset that makes a binary variable of each AI based on a threshold. For example if AI_1 > 0,1 it should get the value 1 in a new variable called ACTIVITY otherwise the value 0 in the same variable ACTIVITY. I tried this with the following code but when I try to find the mean value of the binary variable it indicates that the mean is above 1.. which is curious since it can only take the value of either 0 or 1. So does anyone know how to make 7 of these binary variables in the same dataset where the mean is between 0 and 1?
structure(list(X = 1:30, x1.time = c("00:00:00", "00:01:00",
"00:02:00", "00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00",
"00:08:00", "00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00",
"00:14:00", "00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00",
"00:20:00", "00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00",
"00:26:00", "00:27:00", "00:28:00", "00:29:00"), AI_1 = c(0.17532896077581,
0.174249939439765, 0.174170544792533, 0.172877357886967, 0.173679017353614,
0.174216799443538, 0.174514454250882, 0.174656389074666, 0.173377175454716,
0.173044040397703, 0.172476572884875, 0.174738790856458, 0.173833445732856,
0.174229265722835, 0.174392878820111, 0.174715890976243, 0.174241614289181,
0.173229751013599, 0.173579164085914, 0.173829069216696, 0.173499039975341,
0.174387946222767, 0.173802854581089, 0.174107580137568, 0.174113709936873,
0.173172609295233, 0.174509255493075, 0.173383120975257, 0.173398927511582,
0.173466516952908), AI_2 = c(0.173549588758752, 0, 0.85729795236214,
0.513925586220723, 0.140789239632585, 0.0989981552300843, 0.321625480480368,
0.62540390366724, 0.00714855410741877, 0, 0, 0, 0.212943798631015,
0, 0, 0.023650258664654, 0.00159158576982517, 0.0172670511608436,
0, 0, 0, 0.25653572767355, 0.41158598021939, 0.433889173147664,
0.442200975044019, 0.471931171507954, 0.415009919603445, 0.43364443321512,
0.449930874231746, 0.48397633182816), AI_3 = c(0.026069149474549,
0.0417747330978121, 0.276687600798659, 0.258591321128928, 0.208790296683244,
0.0300099278967508, 0.15234594700642, 0.26519848659315, 0.34220566727692,
0.352310255219813, 0.297621781376737, 0.292800000618149, 0.481566536382664,
0.337770306519177, 0.743182296874282, 0.256202127993172, 0.201340506649845,
0.200155318345632, 0.237126429055375, 0.234974163009848, 0.235808994849961,
0.302168675921402, 0.377936665388589, 0.416123299239618, 0.389279883023212,
0.357972848973051, 0.305268847437493, 0.290040891577408, 0.197384083463156,
0.258282654013295), AI_4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00841646877382803,
0), AI_5 = c(0, 0, 0.0015062890214412, 0.00154798776365785, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), AI_6 = c(0.190018331633492, 0.241159552783285, 0.231916111803065,
0.193196835220518, 0.240381778378367, 0.266125762332231, 0.339227319507121,
0.354841547583334, 0.277011867279295, 0.474462632995715, 0.516356521276347,
0.559477604383845, 0.374857636694405, 0.376675155204282, 0.516347133869462,
0.627633542885353, 0.565732682034457, 0.544148310829377, 0.545022418887296,
0.602327138107482, 0.529578366594453, 0.571672817412653, 0.51963881197827,
0.493590581088222, 0.487545798153711, 0.525272191616523, 0.586906227102549,
0.555446579214151, 0.578788883825157, 0.617822898150646), AI_7 = c(0.139608768263461,
0.165583663096789, 0.326959508587122, 0.221739297198209, 0.160657663051105,
0.107439748199699, 0.117594125364214, 0.133528520361788, 0.117950354159875,
0.131428192187155, 0.125355403562937, 0.119185646272255, 0.196285453922129,
0.167061057207379, 0.169855099745761, 0.141077126343563, 0.078433720675593,
0.0999303057993443, 0.0798045801131668, 0.0331137028671696, 0.0920945831761988,
0.0233052285173748, 0, 0, 0, 0.00876293044107867, 0, 0.109134564970416,
0.110323312017635, 0.117772975747077), ID = c("ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1"
), activity = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0), activity2 = c("0",
"1", "0", "0", "0", "1", "0", "0", "1", "1", "1", "1", "0", "1",
"1", "1", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0"), activity3 = c("1", "1", "0", "0", "0", "1", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0"), activity4 = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1"), activity5 = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), activity6 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0"), activity7 = c("0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "0", "0", "0")), row.names = c(NA,
30L), class = "data.frame")
This is the code I used
Threshold <- Activity_index_1 %>%
mutate(activity = case_when(
AI_1 <= 0.1 ~ "1",
AI_1 > 0.1 ~ "0",
))
Threshold2 <- Threshold %>%
mutate(activity2 = case_when(
AI_2 <= 0.1 ~ "1",
AI_2 > 0.1 ~ "0",
))
Threshold3 <- Threshold2 %>%
mutate(activity3 = case_when(
AI_3 <= 0.1 ~ "1",
AI_3 > 0.1 ~ "0",
))
Threshold4 <- Threshold3 %>%
mutate(activity4 = case_when(
AI_4 <= 0.1 ~ "1",
AI_4 > 0.1 ~ "0",
))
Threshold5 <- Threshold4 %>%
mutate(activity5 = case_when(
AI_5 <= 0.1 ~ "1",
AI_5 > 0.1 ~ "0",
))
Threshold6 <- Threshold5 %>%
mutate(activity6 = case_when(
AI_6 <= 0.1 ~ "1",
AI_6 > 0.1 ~ "0",
))
Threshold7 <- Threshold6 %>%
mutate(activity7 = case_when(
AI_7 <= 0.1 ~ "1",
AI_7 > 0.1 ~ "0",
))
CodePudding user response:
Here is a solution with mutate/across
and a logical condition returning FALSE/TRUE
then coerced to integers 0/1
.
The posted data already has columns activity
so I start by removing them from the data.
suppressPackageStartupMessages({
library(dplyr)
library(stringr)
})
Threshold <- Activity_index_1 %>%
select(-starts_with("activity")) %>%
mutate(across(starts_with("AI_"), ~ as.integer(.x <= 0.1), .names = "activity_{col}")) %>%
rename_at(vars(starts_with("activity_AI")), ~ str_remove(., "_AI_"))
str(Threshold)
#> 'data.frame': 30 obs. of 17 variables:
#> $ X : int 1 2 3 4 5 6 7 8 9 10 ...
#> $ x1.time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
#> $ AI_1 : num 0.175 0.174 0.174 0.173 0.174 ...
#> $ AI_2 : num 0.174 0 0.857 0.514 0.141 ...
#> $ AI_3 : num 0.0261 0.0418 0.2767 0.2586 0.2088 ...
#> $ AI_4 : num 0 0 0 0 0 0 0 0 0 0 ...
#> $ AI_5 : num 0 0 0.00151 0.00155 0 ...
#> $ AI_6 : num 0.19 0.241 0.232 0.193 0.24 ...
#> $ AI_7 : num 0.14 0.166 0.327 0.222 0.161 ...
#> $ ID : chr "ID1" "ID1" "ID1" "ID1" ...
#> $ activity1: int 0 0 0 0 0 0 0 0 0 0 ...
#> $ activity2: int 0 1 0 0 0 1 0 0 1 1 ...
#> $ activity3: int 1 1 0 0 0 1 0 0 0 0 ...
#> $ activity4: int 1 1 1 1 1 1 1 1 1 1 ...
#> $ activity5: int 1 1 1 1 1 1 1 1 1 1 ...
#> $ activity6: int 0 0 0 0 0 0 0 0 0 0 ...
#> $ activity7: int 0 0 0 0 0 0 0 0 0 0 ...
Created on 2022-10-10 with reprex v2.0.2
CodePudding user response:
Comparing just AI
variables with .1
, convert to numeric, set colnames
and cbind
.
res <- cbind(dat, ((dat[grep('^AI', names(dat))] <= .1)^1) |>
{\(.) `colnames<-`(., gsub('AI', 'activity', colnames(.)))}())
str(res)
# 'data.frame': 30 obs. of 16 variables:
# $ x1.time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
# $ AI_1 : num 0.175 0.174 0.174 0.173 0.174 ...
# $ AI_2 : num 0.174 0 0.857 0.514 0.141 ...
# $ AI_3 : num 0.0261 0.0418 0.2767 0.2586 0.2088 ...
# $ AI_4 : num 0 0 0 0 0 0 0 0 0 0 ...
# $ AI_5 : num 0 0 0.00151 0.00155 0 ...
# $ AI_6 : num 0.19 0.241 0.232 0.193 0.24 ...
# $ AI_7 : num 0.14 0.166 0.327 0.222 0.161 ...
# $ ID : chr "ID1" "ID1" "ID1" "ID1" ...
# $ activity_1: num 0 0 0 0 0 0 0 0 0 0 ...
# $ activity_2: num 0 1 0 0 0 1 0 0 1 1 ...
# $ activity_3: num 1 1 0 0 0 1 0 0 0 0 ...
# $ activity_4: num 1 1 1 1 1 1 1 1 1 1 ...
# $ activity_5: num 1 1 1 1 1 1 1 1 1 1 ...
# $ activity_6: num 0 0 0 0 0 0 0 0 0 0 ...
# $ activity_7: num 0 0 0 0 0 0 0 0 0 0 ...
dat <- structure(list(x1.time = c("00:00:00", "00:01:00", "00:02:00",
"00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00", "00:08:00",
"00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00", "00:14:00",
"00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00", "00:20:00",
"00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00", "00:26:00",
"00:27:00", "00:28:00", "00:29:00"), AI_1 = c(0.17532896077581,
0.174249939439765, 0.174170544792533, 0.172877357886967, 0.173679017353614,
0.174216799443538, 0.174514454250882, 0.174656389074666, 0.173377175454716,
0.173044040397703, 0.172476572884875, 0.174738790856458, 0.173833445732856,
0.174229265722835, 0.174392878820111, 0.174715890976243, 0.174241614289181,
0.173229751013599, 0.173579164085914, 0.173829069216696, 0.173499039975341,
0.174387946222767, 0.173802854581089, 0.174107580137568, 0.174113709936873,
0.173172609295233, 0.174509255493075, 0.173383120975257, 0.173398927511582,
0.173466516952908), AI_2 = c(0.173549588758752, 0, 0.85729795236214,
0.513925586220723, 0.140789239632585, 0.0989981552300843, 0.321625480480368,
0.62540390366724, 0.00714855410741877, 0, 0, 0, 0.212943798631015,
0, 0, 0.023650258664654, 0.00159158576982517, 0.0172670511608436,
0, 0, 0, 0.25653572767355, 0.41158598021939, 0.433889173147664,
0.442200975044019, 0.471931171507954, 0.415009919603445, 0.43364443321512,
0.449930874231746, 0.48397633182816), AI_3 = c(0.026069149474549,
0.0417747330978121, 0.276687600798659, 0.258591321128928, 0.208790296683244,
0.0300099278967508, 0.15234594700642, 0.26519848659315, 0.34220566727692,
0.352310255219813, 0.297621781376737, 0.292800000618149, 0.481566536382664,
0.337770306519177, 0.743182296874282, 0.256202127993172, 0.201340506649845,
0.200155318345632, 0.237126429055375, 0.234974163009848, 0.235808994849961,
0.302168675921402, 0.377936665388589, 0.416123299239618, 0.389279883023212,
0.357972848973051, 0.305268847437493, 0.290040891577408, 0.197384083463156,
0.258282654013295), AI_4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00841646877382803,
0), AI_5 = c(0, 0, 0.0015062890214412, 0.00154798776365785, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), AI_6 = c(0.190018331633492, 0.241159552783285, 0.231916111803065,
0.193196835220518, 0.240381778378367, 0.266125762332231, 0.339227319507121,
0.354841547583334, 0.277011867279295, 0.474462632995715, 0.516356521276347,
0.559477604383845, 0.374857636694405, 0.376675155204282, 0.516347133869462,
0.627633542885353, 0.565732682034457, 0.544148310829377, 0.545022418887296,
0.602327138107482, 0.529578366594453, 0.571672817412653, 0.51963881197827,
0.493590581088222, 0.487545798153711, 0.525272191616523, 0.586906227102549,
0.555446579214151, 0.578788883825157, 0.617822898150646), AI_7 = c(0.139608768263461,
0.165583663096789, 0.326959508587122, 0.221739297198209, 0.160657663051105,
0.107439748199699, 0.117594125364214, 0.133528520361788, 0.117950354159875,
0.131428192187155, 0.125355403562937, 0.119185646272255, 0.196285453922129,
0.167061057207379, 0.169855099745761, 0.141077126343563, 0.078433720675593,
0.0999303057993443, 0.0798045801131668, 0.0331137028671696, 0.0920945831761988,
0.0233052285173748, 0, 0, 0, 0.00876293044107867, 0, 0.109134564970416,
0.110323312017635, 0.117772975747077), ID = c("ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1"
)), row.names = c(NA, 30L), class = "data.frame")