Replace row value in a data frame group by the smallest value in that group-CodePudding

I have the following data set:

time <- c(0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5)
value <- c(10,8,6,5,3,2,12,10,6,5,4,2,20,15,16,9,2,2)
group <- c(1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3)

data <- data.frame(time, value, group)

I want to create a new column called data$diff that is equal to data$value minus the value of data$value when data$time == 0 within each group.

I am beginning with the following code

for(i in 1:nrow(data)){
  for(n in 1:max(data$group)){
    if(data$group[i] == n) {
      data$diff[i] <- ???????
    }
  }
}

But cannot figure out what to put in place of the question marks. The desired output would be this table: https://i.stack.imgur.com/1bAKj.png

Any thoughts are appreciated.

CodePudding user response：

Since in your example data$time == 0 is always the first element of the group, you can use this data.table approach.

library(data.table)

setDT(data)
data[, diff := value[1] - value, by = group]

In case that data$time == 0 is not the first element in each group you can use this:

data[, diff := value[time==0] - value, by = group]

Output:

> data
    time value group diff
 1:    0    10     1    0
 2:    1     8     1    2
 3:    2     6     1    4
 4:    3     5     1    5
 5:    4     3     1    7
 6:    5     2     1    8
 7:    0    12     2    0
 8:    1    10     2    2
 9:    2     6     2    6
10:    3     5     2    7
11:    4     4     2    8
12:    5     2     2   10
13:    0    20     3    0
14:    1    15     3    5
15:    2    16     3    4
16:    3     9     3   11
17:    4     2     3   18
18:    5     2     3   18

CodePudding user response：

Here is a short way to do it with dplyr.

library(dplyr)

data %>% 
  group_by(group) %>% 
  mutate(diff = value[which(time == 0)] - value)

Which gives

# Groups:   group [3]
    time value group  diff
   <dbl> <dbl> <dbl> <dbl>
 1     0    10     1     0
 2     1     8     1     2
 3     2     6     1     4
 4     3     5     1     5
 5     4     3     1     7
 6     5     2     1     8
 7     0    12     2     0
 8     1    10     2     2
 9     2     6     2     6
10     3     5     2     7
11     4     4     2     8
12     5     2     2    10
13     0    20     3     0
14     1    15     3     5
15     2    16     3     4
16     3     9     3    11
17     4     2     3    18
18     5     2     3    18

CodePudding user response：

library(dplyr)

vals2use <- data %>% 
  group_by(group) %>%
  filter(time==0) %>% 
  select(c(2,3)) %>%
  rename(value4diff=value)


dataNew <- merge(data, vals2use, all=T)
dataNew$diff <- dataNew$value4diff-dataNew$value
dataNew <- dataNew[,c(1,2,3,5)]
dataNew 
group time value diff
1      1    0    10    0
2      1    1     8    2
3      1    2     6    4
4      1    3     5    5
5      1    4     3    7
6      1    5     2    8
7      2    0    12    0
8      2    1    10    2
9      2    2     6    6
10     2    3     5    7
11     2    4     4    8
12     2    5     2   10
13     3    0    20    0
14     3    1    15    5
15     3    2    16    4
16     3    3     9   11
17     3    4     2   18
18     3    5     2   18

CodePudding user response：

Here is a base R approach.

within(data, diff <- ave(
  seq_along(value), group,
  FUN = \(i) value[i][time[i] == 0] - value[i]
))

Output

   time value group diff
1     0    10     1    0
2     1     8     1    2
3     2     6     1    4
4     3     5     1    5
5     4     3     1    7
6     5     2     1    8
7     0    12     2    0
8     1    10     2    2
9     2     6     2    6
10    3     5     2    7
11    4     4     2    8
12    5     2     2   10
13    0    20     3    0
14    1    15     3    5
15    2    16     3    4
16    3     9     3   11
17    4     2     3   18
18    5     2     3   18