Home > Software engineering >  In R data.frame, how to add new variables according to the current existing variables
In R data.frame, how to add new variables according to the current existing variables

Time:11-09

There is a dataframe 'ori_data' (as the image 'grey area'), how to add two variables 'final_price/final_price_from'?

1.'final_price' retrieve the first appear figure from 'price1:price4'

2.'final_price_from' is the source tag that 'final_price' come from which variables 'price1:price4'

ori_data <- data.frame(category=c("a","b","c","d","e","f","g","h","i"), 
           price1=c(1,9,0,0,0,0,6,9,0),
           price2=c(3,0,2,0,6,0,0,9,0), 
           price3=c(0,2,0,3,0,5,4,0,0), 
           price4=c(0,0,3,0,5,7,0,0,0))

enter image description here

CodePudding user response:

library(tidyverse)

ori_data %>%
    left_join(pivot_longer(.,-category)%>%
        group_by(category) %>%
        summarise(s = row_number()[value > 0][1],
                  final_price_from = replace_na(name[s], 'none'),
                  final_price = replace_na(value[s], 0))) %>%
    select(-s)

 category price1 price2 price3 price4 final_price_from final_price
1        a      1      3      0      0           price1           1
2        b      9      0      2      0           price1           9
3        c      0      2      0      3           price2           2
4        d      0      0      3      0           price3           3
5        e      0      6      0      5           price2           6
6        f      0      0      5      7           price3           5
7        g      6      0      4      0           price1           6
8        h      9      9      0      0           price1           9
9        i      0      0      0      0             none           0

CodePudding user response:

You may try

library(dplyr)
library(reshape2)

final_price <- ori_data %>%
  melt(id.vars = "category") %>%
  group_by(category) %>% arrange(category) %>%
  slice(which.max(value > 0))
names(final_price)[2:3] <- c("final_price_from", "final_price")
ori_data %>%
  left_join(final_price, by = "category") %>%
  mutate(final_price = ifelse(final_price>0, final_price, "none"))

  category price1 price2 price3 price4 final_price_from final_price
1        a      1      3      0      0           price1           1
2        b      9      0      2      0           price1           9
3        c      0      2      0      3           price2           2
4        d      0      0      3      0           price3           3
5        e      0      6      0      5           price2           6
6        f      0      0      5      7           price3           5
7        g      6      0      4      0           price1           6
8        h      9      9      0      0           price1           9
9        i      0      0      0      0           price1        none
  • Related