These are the type of data frames that I have, with two examples as to how they can differ:
- - - - - - - - - - - - - - - - - - - - - - - - Type1 992.0 4461.0 1.2 38476.0 :1..4473
Second data frame:
- - - - - - - - - - - - - - - - - - - - - - - - Type2 1.0 5131.0 0.4 44433.0 -:1998..7151
- - - - - - - - - - - - - - - - - - - - - - - - Type2 5331.0 845.0 1.3 6672.0 -:1164..2016
Type3 1945.0 91.0 18.7 426.0 -:501..597 Type3 1912.0 91.0 18.7 426.0 -:501..597 - - - - - - - - - - - - - - - - - -
Type3 2071.0 196.0 18.9 468.0 -:10..236 Type3 2038.0 196.0 18.9 468.0 -:10..236 Type3 2049.0 141.0 16.3 441.0 -:10..196 Type3 2049.0 141.0 16.3 441.0 -:10..196 Type3 8294.0 151.0 17.2 580.0 -:10..196
- - - - - - - - - - - - - - - - - - - - - - - - Type4 8604.0 1473.0 0.5 13042.0 :1..1471
- - - - - - - - - - - - - - - - - - - - - - - - Type5 9795.0 2114.0 32.0 1971.0 :1296..3439
- - - - - - - - - - - - - - - - - - - - - - - - Type6 10131.0 5684.0 0.3 49063.0 :1455..7113
What I am looking for is code that will extract the first occurrence of anything that isn't '-' without relying on the occurrence being 'Type*', just anything not '-'. So the output would look like this:
Type1
or
Type2
Type3
Type4
Type5
Type6
I can obviously subset for anything that doesn't equal '-' but I can't figure out how to only get the first occurrence, because I want the output to have the same dimensions. I see a lot of solutions for the first occurrence of any word in an entire dataframe, but this needs to be per row and I just can't seem to get it working.
dput for the first data.frame:
dput(x)
structure(list(V1 = structure(1L, .Label = "-", class = "factor"),
V2 = structure(1L, .Label = "-", class = "factor"), V3 = structure(1L, .Label = "-", class = "factor"),
V4 = structure(1L, .Label = "-", class = "factor"), V5 = structure(1L, .Label = "-", class = "factor"),
V6 = structure(1L, .Label = "-", class = "factor"), V7 = structure(1L, .Label = "-", class = "factor"),
V8 = structure(1L, .Label = "-", class = "factor"), V9 = structure(1L, .Label = "-", class = "factor"),
V10 = structure(1L, .Label = "-", class = "factor"), V11 = structure(1L, .Label = "-", class = "factor"),
V12 = structure(1L, .Label = "-", class = "factor"), V13 = structure(1L, .Label = "-", class = "factor"),
V14 = structure(1L, .Label = "-", class = "factor"), V15 = structure(1L, .Label = "-", class = "factor"),
V16 = structure(1L, .Label = "-", class = "factor"), V17 = structure(1L, .Label = "-", class = "factor"),
V18 = structure(1L, .Label = "-", class = "factor"), V19 = structure(1L, .Label = "-", class = "factor"),
V20 = structure(1L, .Label = "-", class = "factor"), V21 = structure(1L, .Label = "-", class = "factor"),
V22 = structure(1L, .Label = "-", class = "factor"), V23 = structure(1L, .Label = "-", class = "factor"),
V24 = structure(1L, .Label = "-", class = "factor"), V25 = structure(1L, .Label = "Type1", class = "factor"),
V26 = 992, V27 = 4461, V28 = 1.2, V29 = 38476, V30 = structure(1L, .Label = ":1..4473", class = "factor")), class = "data.frame", row.names = c(NA, -1L))
The second data frame example:
structure(list(V1 = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("-",
"Type2"), class = "factor"), V2 = structure(c(1L, 1L, 2L, 3L,
1L, 1L, 1L), .Label = c("-", "1945.0", "2071.0"), class = "factor"),
V3 = structure(c(1L, 1L, 3L, 2L, 1L, 1L, 1L), .Label = c("-",
"196.0", "91.0"), class = "factor"), V4 = structure(c(1L,
1L, 2L, 3L, 1L, 1L, 1L), .Label = c("-", "18.7", "18.9"), class = "factor"),
V5 = structure(c(1L, 1L, 2L, 3L, 1L, 1L, 1L), .Label = c("-",
"426.0", "468.0"), class = "factor"), V6 = structure(c(1L,
1L, 3L, 2L, 1L, 1L, 1L), .Label = c("-", "-:10..236", "-:501..597"
), class = "factor"), V7 = structure(c(1L, 1L, 2L, 2L, 1L,
1L, 1L), .Label = c("-", "Type2"), class = "factor"), V8 = structure(c(1L,
1L, 2L, 3L, 1L, 1L, 1L), .Label = c("-", "1912.0", "2038.0"
), class = "factor"), V9 = structure(c(1L, 1L, 3L, 2L, 1L,
1L, 1L), .Label = c("-", "196.0", "91.0"), class = "factor"),
V10 = structure(c(1L, 1L, 2L, 3L, 1L, 1L, 1L), .Label = c("-",
"18.7", "18.9"), class = "factor"), V11 = structure(c(1L,
1L, 2L, 3L, 1L, 1L, 1L), .Label = c("-", "426.0", "468.0"
), class = "factor"), V12 = structure(c(1L, 1L, 3L, 2L, 1L,
1L, 1L), .Label = c("-", "-:10..236", "-:501..597"), class = "factor"),
V13 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"Type2"), class = "factor"), V14 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "2049.0"), class = "factor"),
V15 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"141.0"), class = "factor"), V16 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "16.3"), class = "factor"),
V17 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"441.0"), class = "factor"), V18 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "-:10..196"), class = "factor"),
V19 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"Type2"), class = "factor"), V20 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "2049.0"), class = "factor"),
V21 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"141.0"), class = "factor"), V22 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "16.3"), class = "factor"),
V23 = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("-",
"441.0"), class = "factor"), V24 = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("-", "-:10..196"), class = "factor"),
V25 = structure(c(4L, 4L, 1L, 3L, 5L, 2L, 5L), .Label = c("-",
"Type3", "Type2", "Type4", "Type5"), class = "factor"),
V26 = structure(c(2L, 4L, 1L, 5L, 6L, 7L, 3L), .Label = c("-",
"1.0", "10131.0", "5331.0", "8294.0", "8604.0", "9795.0"), class = "factor"),
V27 = structure(c(5L, 7L, 1L, 3L, 2L, 4L, 6L), .Label = c("-",
"1473.0", "151.0", "2114.0", "5131.0", "5684.0", "845.0"), class = "factor"),
V28 = structure(c(3L, 5L, 1L, 6L, 4L, 7L, 2L), .Label = c("-",
"0.3", "0.4", "0.5", "1.3", "17.2", "32.0"), class = "factor"),
V29 = structure(c(4L, 7L, 1L, 6L, 2L, 3L, 5L), .Label = c("-",
"13042.0", "1971.0", "44433.0", "49063.0", "580.0", "6672.0"
), class = "factor"), V30 = structure(c(4L, 3L, 1L, 2L, 5L,
6L, 7L), .Label = c("-", "-:10..196", "-:1164..2016", "-:1998..7151",
":1..1471", ":1296..3439", ":1455..7113"), class = "factor")), class = "data.frame", row.names = c(NA,
-7L))
CodePudding user response:
The following works:
apply(df, MARGIN = 1, FUN = function(row) row[!grepl("-", row)][1])
[1] "Type2" "Type2" "Type3" "Type3" "Type4" "Type5" "Type6"
apply
with MARGIN = 1
acts on rows. The function in FUN
uses grepl
to catch all elements of the row not matching -
and returns the first element with [1]
.
CodePudding user response:
We can use a vectorized option with max.col
to find the position of column where the first
case of non -
occurs and cbind
with sequence of rows and extract the values based on the row/column index
df1[cbind(seq_len(nrow(df1)), max.col(df1 != "-", "first"))]
[1] "Type4" "Type4" "Type2" "Type2" "Type5" "Type3" "Type5"
x[cbind(seq_len(nrow(x)), max.col(x != "-", "first"))]
[1] "Type1"