Home > OS >  Adding together rows and creating new column of values in R
Adding together rows and creating new column of values in R

Time:12-29

I have a unique dataset that shows cycling data over time. The test's in this dataset reset their cycle count as the test ends. The test then resumes I want to try to add together TotlCycle column after the first reset and create a new column that shows the overall Cycle Count.

This is an example of the dataset.

cycle <- data.frame(TotlCycle = c (5, 6, 25, 45, 5, 6, 25, 49, 5, 6, 25, 47),
                    Test_Name = c ("Test", "Test", "Test", "Test", "Test-2", "Test-2", "Test-2", "Test-2", "Test-3", "Test-3", "Test-3", "Test-3"),
                    Test_Location = "Location_1")
cycle
  TotlCycle Test_Name Test_Location
1          5      Test    Location_1
2          6      Test    Location_1
3         25      Test    Location_1
4         45      Test    Location_1
5          5    Test-2    Location_1
6          6    Test-2    Location_1
7         25    Test-2    Location_1
8         49    Test-2    Location_1
9          5    Test-3    Location_1
10         6    Test-3    Location_1
11        25    Test-3    Location_1
12        47    Test-3    Location_1

I would like to add together the TotlCycle column and create a new column that would look like this.

cycle2 <- data.frame(TotlCycle = c (5, 6, 25, 46, 5, 6, 25, 49, 5, 6, 25, 47),
                     Test_Name = c ("Test", "Test", "Test", "Test", "Test-2", "Test-2", "Test-2", "Test-2", "Test-3", "Test-3", "Test-3", "Test-3"),
                     New_Total_Cycle = c (5, 6, 25, 45, 50, 51, 70, 94, 99, 100, 119, 141),
                     Test_Location = "Location_1")
cycle2
   TotlCycle Test_Name New_Total_Cycle Test_Location
1          5      Test               5    Location_1
2          6      Test               6    Location_1
3         25      Test              25    Location_1
4         46      Test              45    Location_1
5          5    Test-2              50    Location_1
6          6    Test-2              51    Location_1
7         25    Test-2              70    Location_1
8         49    Test-2              94    Location_1
9          5    Test-3              99    Location_1
10         6    Test-3             100    Location_1
11        25    Test-3             119    Location_1
12        47    Test-3             141    Location_1

The issue with this dataset is there are many different fields for column Test_Name that are run together as sets. The Test_Location column groups these sets together.

An example of this would be Test, Test-2, Test-3 have a Test_Location of Locaiton_1. Sample, Sample-2, Sample-3 would have a Test_Location of Location_2.

CodePudding user response:

Here a solution with for loop controlling every location. For simplicity I use the same data in "Location 2" that I have created for testing:

library(dplyr)

cycle <- data.frame(TotlCycle = c (5, 6, 25, 45, 5, 6, 25, 49, 5, 6, 25, 47),
                    Test_Name = c ("Test", "Test", "Test", "Test", "Test-2", "Test-2", "Test-2", "Test-2", "Test-3", "Test-3", "Test-3", "Test-3"),
                    Test_Location = "Location_1")

cycle <- cycle %>% bind_rows(data.frame(TotlCycle = c (5, 6, 25, 45, 5, 6, 25, 49, 5, 6, 25, 47),
                    Test_Name = c ("Test", "Test", "Test", "Test", "Test-2", "Test-2", "Test-2", "Test-2", "Test-3", "Test-3", "Test-3", "Test-3"),
                    Test_Location = "Location_2"))

locations <- unique(cycle$Test_Location)

cycle2 <- data.frame()

for (i in locations){
  
  t_prov <- cycle %>% filter(Test_Location == i)
  
  t_prov$TotlCycle_lag <- c(0, head(t_prov$TotlCycle, -1))
  
  t_prov$TotlCycle_dif <- t_prov$TotlCycle - t_prov$TotlCycle_lag
  
  pos_TestName <- as.data.frame(table(t_prov$Test_Name)) %>% select(Test_Name = Var1, Freq) %>% 
    mutate(endPos = cumsum(Freq)) %>% mutate(firstPos = endPos - Freq   1)
  
  new_totl_v <- c(t_prov$TotlCycle[1])
  
  for(j in 2:nrow(t_prov)){
    
    if(j %in% pos_TestName$firstPos) {
      
      new_totl_v <- append(new_totl_v, new_totl_v[j-1]   t_prov$TotlCycle[j])
    } else {
      
      new_totl_v <- append(new_totl_v, new_totl_v[j-1]   t_prov$TotlCycle_dif[j])
    }
  }
  
  t_prov$New_Total_Cycle <- new_totl_v
  
  if(nrow(cycle2) == 0){
    cycle2 <- t_prov
  }else{
    cycle2 <- cycle2 %>% bind_rows(t_prov)
  }
}

CodePudding user response:

If Test_name are contiguous, you can do this:

cycle <- data.frame(TotlCycle = c (5, 6, 25, 45, 5, 6, 25, 49, 5, 6, 25, 47),
                    Test_Name = c ("Test", "Test", "Test", "Test", "Test-2", "Test-2", "Test-2", "Test-2", "Test-3", "Test-3", "Test-3", "Test-3"))


cycle <- transform(cycle, New_Total_Cycle = c(0,head(TotlCycle ,-1)))

cycle <- transform(cycle, New_Total_Cycle = cumsum(
  c(head(TotlCycle,1),ifelse(tail(Test_Name,-1) != head(Test_Name, -1), 
       tail(TotlCycle,-1),  tail(TotlCycle - New_Total_Cycle,-1)))))

cycle
#>    TotlCycle Test_Name New_Total_Cycle
#> 1          5      Test               5
#> 2          6      Test               6
#> 3         25      Test              25
#> 4         45      Test              45
#> 5          5    Test-2              50
#> 6          6    Test-2              51
#> 7         25    Test-2              70
#> 8         49    Test-2              94
#> 9          5    Test-3              99
#> 10         6    Test-3             100
#> 11        25    Test-3             119
#> 12        47    Test-3             141

CodePudding user response:

Here is another approach. You can create a temporary column containing the previous test's final TotlCycle value. This can be used for a cumulative sum. Note that rleid from data.table is included in case the Test_Name is repeated.

Edit: With Test_Location, you will need additional group_by so that tests are maintained between each location. See below for modified answer.

library(tidyverse)
library(data.table)

cycle %>%
  group_by(Test_Location) %>%
  mutate(last = lag(TotlCycle, default = 0)) %>%
  group_by(group = rleid(Test_Name), .add = T) %>%
  mutate(last = ifelse(row_number() == 1, first(last), 0)) %>%
  group_by(Test_Location) %>%
  mutate(New_Total_Cycle = cumsum(last)   TotlCycle) %>%
  select(-last, -group)

Output

   TotlCycle Test_Name Test_Location New_Total_Cycle
       <dbl> <chr>     <chr>                   <dbl>
 1         5 Test      Location_1                  5
 2         6 Test      Location_1                  6
 3        25 Test      Location_1                 25
 4        45 Test      Location_1                 45
 5         5 Test-2    Location_1                 50
 6         6 Test-2    Location_1                 51
 7        25 Test-2    Location_1                 70
 8        49 Test-2    Location_1                 94
 9         5 Test-3    Location_1                 99
10         6 Test-3    Location_1                100
11        25 Test-4    Location_1                125
12        47 Test-4    Location_1                147
  • Related