I have data like this:
example_df <- data.frame(
col1type1 =c(110:106),
col2type2 = c(-108:-104),
col3type1 = c(-109:-105),
col4type2 =c(110:106),
col5type1 =c(107:103),
col6type2 = c(-110:-106),
col7type1 =c(109:113),
col8type2 = c(-120:-116),
col9type1 = c(-105:-101),
col10type2 =c(105:101),
col11type1 = c(-125:-121),
col12type2 = c(-105:-101)
)
I want to return only combinations where type1 type2>=0 on the same row and return to a new df the combination where it was >=0, the row, and the two numbers: (I know I could use for/foreach to calculate each cell individually and output to a data.frame, but there has to be a more efficient way)
Desired output like this (incomplete):
#for all possible combinations, like the example rows below
example_first <- data.frame(column_combination="col1type1_col2type2", row=1, sum=2,col1number=110,col2number=-108)
example_mid<- data.frame(column_combination="col1type1_col12type2",row=3, sum=5,col1number=108,col2number=-103)
example_last <- data.frame(column_combination="col9type1_col10type2",row=5,sum=0,col1number=-101,col2number=101)
#would want like this for all possible combinations
desired_incomplete_output <- rbind(example_first,example_mid,example_last)
What is an efficient way to calculate this en masse rather than a brutal for/foreach loop? Thanks!
CodePudding user response:
If the desired complete output consists of 79 results, for the given example, you may do something like this.
Explanation of steps-
- Through first 2 lines i.e.
mutate
andsplit
we have split the data into separate row each of its own dataframe, i.e. into a list. - To work with this list I have used
purrr::imap_dfr
which basically takes a list as input and outputs adata.frame
after row binding all results. In each of its sub-step, I have done-- First deselecting row column
- the pivoting everything
- then separating
name
column which has all your column names of input data into two separate columns usingtidyr::separate
- then creating a cross product of
num1
andnum2
combinations usingpurrr::cross2
- then using map_dfr again to convert that cross product into a data frame
- then using separator to separate column names and values. I used a seapartor
@
which I assumed that it is nowhere used in column names - thereafter filtering the rows
- other basic data wrangling/transformation using
dplyr
verbs
library(tidyverse)
example_df %>%
mutate(row = row_number()) %>%
split(.$row) %>%
imap_dfr(\(.a, .b) .a %>%
select(-row) %>%
pivot_longer(everything()) %>%
separate(name, into = c('col', 'type'), sep = '(?:type)') %>%
{cross2(paste(.$col[.$type == '1'], .$value[.$type == '1'], sep = "@"),
paste(.$col[.$type == '2'], .$value[.$type == '2'], sep = "@"))} %>%
map_dfr(~ set_names(.x, c('x', 'y'))) %>%
separate(x, into = c('col1', 'type1'), convert = TRUE, sep = '@') %>%
separate(y, into = c('col2', 'type2'), convert = TRUE, sep = "@") %>%
filter(type1 type2 >= 0) %>%
mutate(col_comb = paste0(col1, 'type1_', col2, "type2"),
sum= type1 type2) %>%
rename(col1number = type1,
col2number = type2) %>%
select(-col1, -col2) %>%
mutate(row = .b))
#> # A tibble: 79 × 5
#> col1number col2number col_comb sum row
#> <int> <int> <chr> <int> <chr>
#> 1 110 -108 col1type1_col2type2 2 1
#> 2 109 -108 col7type1_col2type2 1 1
#> 3 110 110 col1type1_col4type2 220 1
#> 4 -109 110 col3type1_col4type2 1 1
#> 5 107 110 col5type1_col4type2 217 1
#> 6 109 110 col7type1_col4type2 219 1
#> 7 -105 110 col9type1_col4type2 5 1
#> 8 110 -110 col1type1_col6type2 0 1
#> 9 110 105 col1type1_col10type2 215 1
#> 10 107 105 col5type1_col10type2 212 1
#> # … with 69 more rows
In case your columns are named as anum1
, anum2
, bnum1
..., we can modify this a bit (3 steps actually, all marked as comments)
example_df %>%
mutate(row = row_number()) %>%
split(.$row) %>%
imap_dfr(\(.a, .b) .a %>%
select(-row) %>%
pivot_longer(everything()) %>%
separate(name, into = c('col', 'type'), sep = '(?:num)') %>% # change sep
{cross2(paste(.$col[.$type == '1'], .$value[.$type == '1'], sep = "@"),
paste(.$col[.$type == '2'], .$value[.$type == '2'], sep = "@"))} %>%
map_dfr(~ set_names(.x, c('x', 'y'))) %>%
separate(x, into = c('col1', 'type1'), convert = TRUE, sep = '@') %>%
separate(y, into = c('col2', 'type2'), convert = TRUE, sep = "@") %>%
filter(type1 type2 >= 0) %>%
mutate(col_comb = paste0(col1, 'type1_', col2, "type2"),
sum= type1 type2) %>%
rename(col1number = num1, # change prefix
col2number = num2) %>% # change prefix
select(-col1, -col2) %>%
mutate(row = .b))
CodePudding user response:
You can pivot_longer, excepting each column in iteration. Then mutate to create sum, column names, rows, and filter for non-negatives
library(dplyr)
library(tidyr)
library(purrr)
map_dfr(1:length(example_df), function(i){
example_df %>%
# Get row number
tibble::rownames_to_column(., var = "row") %>%
# Excepting the rowname column and iterating through each column (except row)
pivot_longer(-c(row, (i 1))) %>%
# Get col1number value
mutate(col1number = .[[2]]) %>%
rowwise() %>%
# Get the column names and paste together for combination
mutate(column1 = colnames(example_df)[i],
column_combination = paste(column1, name, sep = "_"),
# These are the value columns
sum = sum(across(c(2, 4))),
# col2 name
col2number = value) %>%
filter(sum >= 0) %>%
select(column_combination, row, sum, col1number, col2number)
}) %>%
bind_rows %
ungroup()
Gives:
# A tibble: 288 × 5
column_combination row sum col1number col2number
<chr> <chr> <int> <int> <int>
1 col1type1_col2type2 1 2 110 -108
2 col1type1_col3type1 1 1 110 -109
3 col1type1_col4type2 1 220 110 110
4 col1type1_col5type1 1 217 110 107
5 col1type1_col6type2 1 0 110 -110
6 col1type1_col7type1 1 219 110 109
7 col1type1_col9type1 1 5 110 -105
8 col1type1_col10type2 1 215 110 105
9 col1type1_col12type2 1 5 110 -105
10 col1type1_col2type2 2 2 109 -107
# … with 278 more rows
CodePudding user response:
A matrix approach:
m <- as.matrix(example_df)
type1 <- seq(1,ncol(m),by=2)
type2 <- seq(2,ncol(m),by=2)
cbn <- expand.grid(type1=type1,type2=type2)
res.selection <- as.vector((m[,cbn$type1] m[,cbn$type2])>0)
res.row <- rep(1:nrow(m),nrow(cbn))
res.type1number <- as.vector(m[,cbn$type1])
res.type2number <- as.vector(m[,cbn$type2])
res.sum <- as.vector(m[,cbn$type1] m[,cbn$type2])
res.type1 <- rep(cbn$type1,each=nrow(m))
res.type2 <- rep(cbn$type2,each=nrow(m))
data.frame( combination = paste0('col',res.type1[res.selection],'type1-col',res.type2[res.selection],'type2'),
row = res.row[res.selection],
type1number = res.type1number[res.selection],
type2number = res.type2number[res.selection],
sum = res.sum[res.selection])
#> combination row type1number type2number sum
#> 1 col1type1-col2type2 1 110 -108 2
#> 2 col1type1-col2type2 2 109 -107 2
#> 3 col1type1-col2type2 3 108 -106 2
#> 4 col1type1-col2type2 4 107 -105 2
#> 5 col1type1-col2type2 5 106 -104 2
#> 6 col7type1-col2type2 1 109 -108 1
#> 7 col7type1-col2type2 2 110 -107 3
#> 8 col7type1-col2type2 3 111 -106 5
#> 9 col7type1-col2type2 4 112 -105 7
#> 10 col7type1-col2type2 5 113 -104 9
#> 11 col1type1-col4type2 1 110 110 220
#> 12 col1type1-col4type2 2 109 109 218
#> 13 col1type1-col4type2 3 108 108 216
#> 14 col1type1-col4type2 4 107 107 214
#> 15 col1type1-col4type2 5 106 106 212
#> 16 col3type1-col4type2 1 -109 110 1
#> 17 col3type1-col4type2 2 -108 109 1
#> 18 col3type1-col4type2 3 -107 108 1
#> 19 col3type1-col4type2 4 -106 107 1
#> 20 col3type1-col4type2 5 -105 106 1
#> 21 col5type1-col4type2 1 107 110 217
#> 22 col5type1-col4type2 2 106 109 215
#> 23 col5type1-col4type2 3 105 108 213
#> 24 col5type1-col4type2 4 104 107 211
#> 25 col5type1-col4type2 5 103 106 209
#> 26 col7type1-col4type2 1 109 110 219
#> 27 col7type1-col4type2 2 110 109 219
#> 28 col7type1-col4type2 3 111 108 219
#> 29 col7type1-col4type2 4 112 107 219
#> 30 col7type1-col4type2 5 113 106 219
#> 31 col9type1-col4type2 1 -105 110 5
#> 32 col9type1-col4type2 2 -104 109 5
#> 33 col9type1-col4type2 3 -103 108 5
#> 34 col9type1-col4type2 4 -102 107 5
#> 35 col9type1-col4type2 5 -101 106 5
#> 36 col7type1-col6type2 2 110 -109 1
#> 37 col7type1-col6type2 3 111 -108 3
#> 38 col7type1-col6type2 4 112 -107 5
#> 39 col7type1-col6type2 5 113 -106 7
#> 40 col1type1-col10type2 1 110 105 215
#> 41 col1type1-col10type2 2 109 104 213
#> 42 col1type1-col10type2 3 108 103 211
#> 43 col1type1-col10type2 4 107 102 209
#> 44 col1type1-col10type2 5 106 101 207
#> 45 col5type1-col10type2 1 107 105 212
#> 46 col5type1-col10type2 2 106 104 210
#> 47 col5type1-col10type2 3 105 103 208
#> 48 col5type1-col10type2 4 104 102 206
#> 49 col5type1-col10type2 5 103 101 204
#> 50 col7type1-col10type2 1 109 105 214
#> 51 col7type1-col10type2 2 110 104 214
#> 52 col7type1-col10type2 3 111 103 214
#> 53 col7type1-col10type2 4 112 102 214
#> 54 col7type1-col10type2 5 113 101 214
#> 55 col1type1-col12type2 1 110 -105 5
#> 56 col1type1-col12type2 2 109 -104 5
#> 57 col1type1-col12type2 3 108 -103 5
#> 58 col1type1-col12type2 4 107 -102 5
#> 59 col1type1-col12type2 5 106 -101 5
#> 60 col5type1-col12type2 1 107 -105 2
#> 61 col5type1-col12type2 2 106 -104 2
#> 62 col5type1-col12type2 3 105 -103 2
#> 63 col5type1-col12type2 4 104 -102 2
#> 64 col5type1-col12type2 5 103 -101 2
#> 65 col7type1-col12type2 1 109 -105 4
#> 66 col7type1-col12type2 2 110 -104 6
#> 67 col7type1-col12type2 3 111 -103 8
#> 68 col7type1-col12type2 4 112 -102 10
#> 69 col7type1-col12type2 5 113 -101 12