Home > Net >  Counting the Position of the First 0 in Each Row
Counting the Position of the First 0 in Each Row

Time:06-14

I have a dataset that looks like this:

set.seed(999)
col1 = sample.int(10, 10)
col2 = sample.int(10, 10)
col3 = sample.int(10, 10)
col4 = sample.int(10, 10)
col5 = sample.int(10, 10)

col_data = data.frame(col1, col2, col3, col4, col5)

  col1 col2 col3 col4 col5
1     4    8    3    9    8
2     7    5    9    7   10
3     1    7    7    8    2
4     6    6    5    5    4
5     8   10    8    3    7
6     2    3    1    2    6
7     5    9    2    1    1
8    10    2    4    4    3
9     9    1   10    6    9
10    3    4    6   10    5

I would like to create new columns in this dataset that :

  • Find out the position (i.e. column number) for the first "9" in each row
  • Find out the position (i.e. column number) for the first "7" in each row
  • Find out the position (i.e. column number) for the first "1" in each row
  • Find out the position (i.e. column number) for the first "10" in each row
  • Find out the position (i.e. column number) for the first "4" in each row

I thought this might be easier to do if the data was a matrix, and then convert it back to a data frame:

col_d = as.matrix(col_data)


first_4 = apply(col_d == 9, 1, which.max)
first_7 = apply(col_d == 7, 1, which.max)
first_1 = apply(col_d == 1, 1, which.max)
first_10 = apply(col_d == 10, 1, which.max)
first_4 = apply(col_d == 4, 1, which.max)

final = cbind(col_data, first_4, first_7, first_1, first_10, first_4)

But this does not appear to be working:

   col1 col2 col3 col4 col5 first_4 first_7 first_1 first_10 first_4
1     4    8    3    9    8       1       1       1        1       1
2     7    5    9    7   10       1       1       1        5       1
3     1    7    7    8    2       1       2       1        1       1
4     6    6    5    5    4       5       1       1        1       5
5     8   10    8    3    7       1       5       1        2       1
6     2    3    1    2    6       1       1       3        1       1
7     5    9    2    1    1       1       1       4        1       1
8    10    2    4    4    3       3       1       1        1       3
9     9    1   10    6    9       1       1       2        3       1
10    3    4    6   10    5       2       1       1        4       2

For example: In the first row, there is no 10 - but the value of "first_10" is 1

  • Is there a way to resolve this error?

Thank you!

CodePudding user response:

How about

apply(col_data == 7, 1, function(x) {ifelse(sum(x)==0, NA, which.max(x))})
[1] NA  1  2 NA  5 NA NA NA NA NA

apply(col_data == 10, 1, function(x) {ifelse(sum(x)==0, NA, which.max(x))})
[1] NA  5 NA NA  2 NA NA  1  3  4

You may change NA whatever you want, that it means there is no that number(i.e 7 or 10)

get second one

apply(col_data == 7, 1, function(x) {ifelse(sum(x)==0, NA, which(x)[2])})

get last one

apply(col_data == 7, 1, function(x) {ifelse(sum(x)==0, NA, dplyr::last(which(x)))})

CodePudding user response:

Use max.col:

nr <- c(9, 7, 1, 10, 4)
nr <- setNames(nr, paste0("first_", nr))
cbind(col_data, sapply(nr, function(x) {
  . <- col_data == x
  tt <- max.col(., "first")
  is.na(tt) <- tt == 1 & !.[,1]
  tt
}))
#   col1 col2 col3 col4 col5 first_9 first_7 first_1 first_10 first_4
#1     4    8    3    9    8       4      NA      NA       NA       1
#2     7    5    9    7   10       3       1      NA        5      NA
#3     1    7    7    8    2      NA       2       1       NA      NA
#4     6    6    5    5    4      NA      NA      NA       NA       5
#5     8   10    8    3    7      NA       5      NA        2      NA
#6     2    3    1    2    6      NA      NA       3       NA      NA
#7     5    9    2    1    1       2      NA       4       NA      NA
#8    10    2    4    4    3      NA      NA      NA        1       3
#9     9    1   10    6    9       1      NA       2        3      NA
#10    3    4    6   10    5      NA      NA      NA        4       2

For the last:

nr <- c(9, 7, 1, 10, 4)
nr <- setNames(nr, paste0("last_", nr))
cbind(col_data, sapply(nr, function(x) {
  . <- col_data == x
  tt <- max.col(., "last")
  is.na(tt) <- rowSums(.) == 0
  tt
}))
#   col1 col2 col3 col4 col5 last_9 last_7 last_1 last_10 last_4
#1     4    8    3    9    8      4     NA     NA      NA      1
#2     7    5    9    7   10      3      4     NA       5     NA
#3     1    7    7    8    2     NA      3      1      NA     NA
#4     6    6    5    5    4     NA     NA     NA      NA      5
#5     8   10    8    3    7     NA      5     NA       2     NA
#6     2    3    1    2    6     NA     NA      3      NA     NA
#7     5    9    2    1    1      2     NA      5      NA     NA
#8    10    2    4    4    3     NA     NA     NA       1      4
#9     9    1   10    6    9      5     NA      2       3     NA
#10    3    4    6   10    5     NA     NA     NA       4      2

And for the second match:

nr <- c(9, 7, 1, 10, 4)
nr <- setNames(nr, paste0("2nd_", nr))
cbind(col_data, sapply(nr, function(x) {
  . <- which(col_data == x, TRUE)
  . <- tapply(.[,2], .[,1], `[`, 2)
  replace(rep(NA_integer_, nrow(col_data)), as.integer(names(.)), .)
}))
#   col1 col2 col3 col4 col5 2nd_9 2nd_7 2nd_1 2nd_10 2nd_4
#1     4    8    3    9    8    NA    NA    NA     NA    NA
#2     7    5    9    7   10    NA     4    NA     NA    NA
#3     1    7    7    8    2    NA     3    NA     NA    NA
#4     6    6    5    5    4    NA    NA    NA     NA    NA
#5     8   10    8    3    7    NA    NA    NA     NA    NA
#6     2    3    1    2    6    NA    NA    NA     NA    NA
#7     5    9    2    1    1    NA    NA     5     NA    NA
#8    10    2    4    4    3    NA    NA    NA     NA     4
#9     9    1   10    6    9     5    NA    NA     NA    NA
#10    3    4    6   10    5    NA    NA    NA     NA    NA

Or using apply on one column.

#First
apply(col_data == 9, 1, function(x) if(any(x)) which.max(x) else NA)
# [1]  4  3 NA NA NA NA  2 NA  1 NA

#Last
apply(col_data == 9, 1, function(x) if(any(x)) tail(which(x), 1) else NA)
# [1]  4  3 NA NA NA NA  2 NA  5 NA

#Second
apply(col_data == 9, 1, function(x) if(any(x)) which(x)[2] else NA)
# [1] NA NA NA NA NA NA NA NA  5 NA
  •  Tags:  
  • r
  • Related