Home > front end >  Remove all rows before row meeting condition in R
Remove all rows before row meeting condition in R

Time:10-19

I am importing multiple excel files. The files have a non-standard structure, but all have the required data after a row of headers, midway down the rows of the data frame.

Here is a MWE:

df= structure(list(...1 = c("CPET Results", NA, "Operator", NA, NA, 
"Patient data", NA, "Administrative Data", "ID", "Title", "Last Name", 
"First Name", "Name Addition", "Sex", "Date of Birth", NA, NA, 
"Biological and Medical Baseline Data", "Height", "Weight", "Mask", 
"Race", "Body Fat", "Hip/Waist Ratio", "BMI", "Estimated Fitness Level", 
"BSA", "Hct", "Hb", "Medication that changes the Heart Rate", 
"Medication", "Existing Medical Conditions", NA, NA, NA, "Test data", 
"Start Time", "Duration", "CPET device", "Serial number", "Firmware version", 
"Flow Sensor", "Temperature", "Barometric Pressure", "Humidity", 
NA, NA, NA, "Variable", "V'O2", "V'CO2", "V'O2/kg", "V'O2/HR", 
"HR", "V'E/V'O2", "V'E/V'CO2", "V'E", "BF", "RER", "WR", NA, 
NA, "t", "h:mm:ss.ms", "0:00:25.000", "0:00:26.000", "0:00:27.000", 
"0:00:28.000", "0:00:29.000", "0:00:30.000"), ...2 = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Unit", 
"L/min", "L/min", "ml/min/kg", "ml", "/min", NA, NA, "L/min", 
"/min", NA, "W", NA, NA, "Phase", NA, "Rest", "Rest", "Rest", 
"Rest", "Rest", "Rest"), ...3 = c(NA, NA, NA, NA, NA, NA, NA, 
NA, "343", NA, "GFRex", "343", NA, "female", "21/05/1924", NA, 
NA, NA, "178 cm", "88.2 kg", "Blue, medium", NA, NA, "0.96", 
"28", NA, "2.06 m2", NA, NA, NA, NA, NA, NA, NA, NA, NA, "12/04/2021 11:27 AM", 
"0:15:12", "MetaLyzer 3B-R3", "231821624", "1.3.10", NA, "21.5°C", 
"1030mBar", "36%", NA, NA, NA, "Rest", "0.36", "0.31", "4", "0", 
"-", "35.7", "40.0", "14.9", "14", "0.88", "0", NA, NA, "Marker", 
NA, NA, NA, NA, NA, NA, NA), ...4 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Unloaded Pedalling", 
"-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", NA, NA, 
"V'O2", "L/min", "0.61123179277253403", "0.61123179277253403", 
"0.61123179277253403", "0.61123179277253403", "0.51731964113453299", 
"0.51731964113453299"), ...5 = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Warm Up", "0.61", "0.47", 
"7", "0", "-", "26.2", "33.9", "18.5", "16", "0.77", "0", NA, 
NA, "V'O2/kg", "ml/min/kg", "6.9339965147196203", "6.9339965147196203", 
"6.9339965147196203", "6.9339965147196203", "5.8686289408341796", 
"5.8686289408341796"), ...6 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, "VT1", "1.22", "0.98", "14", 
"-", "-", "28.6", "32.3", "35.4", "22", "0.88", "71", NA, NA, 
"V'O2/HR", "ml", "0", "0", "0", "0", "0", "0"), ...7 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "VT1 % Norm", 
"145", "-", "145", "-", "-", "-", "-", "-", "102", "-", "131", 
NA, NA, "HR", "/min", NA, NA, NA, NA, NA, NA), ...8 = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "VT1 % Max", 
"69", "55", "69", "-", "-", "83", "96", "54", "76", "87", "55", 
NA, NA, "WR", "W", "0", "0", "0", "0", "0", "0"), ...9 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "VT2", 
"1.71", "1.66", "19", "-", "-", "31.9", "32.7", "60.0", "32", 
"0.97", "122", NA, NA, "V'E/V'O2", NA, "30.6521809263484", "30.6521809263484", 
"30.6521809263484", "30.6521809263484", "34.760039405568897", 
"34.760039405568897"), ...10 = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, "VT2 % Norm", "203", "-", 
"203", "-", "-", "-", "-", "-", "147", "-", "226", NA, NA, "V'E/V'CO2", 
NA, "35.697970640705897", "35.697970640705897", "35.697970640705897", 
"35.697970640705897", "39.618822090063901", "39.618822090063901"
), ...11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, "VT2 % Max", "97", "93", "97", "-", "-", "93", 
"97", "92", "110", "96", "96", NA, NA, "RER", NA, "0.858653317715381", 
"0.858653317715381", "0.858653317715381", "0.858653317715381", 
"0.87736175817015105", "0.87736175817015105"), ...12 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "V'O2peak", 
"1.77", "1.79", "20", "0", "-", "34.3", "33.8", "65.4", "29", 
"1.01", "128", NA, NA, "V'E", "L/min", "23.334937499999999", 
"23.334937499999999", "23.334937499999999", "23.334937499999999", 
"21.762284444444401", "21.762284444444401"), ...13 = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "V'O2peak % Norm", 
"210", "-", "210", "0", "-", "-", "-", "-", "135", "-", "237", 
NA, NA, "VT", "L", "0.86250000000000004", "0.86250000000000004", 
"0.86250000000000004", "0.86250000000000004", "0.97866666666666702", 
"0.97866666666666702"), ...14 = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Normal", "0.84", "-", "10", 
"8", "104", "-", "-", "-", "22", "-", "54", NA, NA, "BF", "/min", 
"27.055", "27.055", "27.055", "27.055", "22.2366666666667", "22.2366666666667"
), ...15 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, "Absolute Maximum Values", "2.06", "1.96", "23", 
"0", "-", "44.5", "37.2", "73.5", "34", "1.19", "128", NA, NA, 
"V'CO2", "L/min", "0.52483620675725695", "0.52483620675725695", 
"0.52483620675725695", "0.52483620675725695", "0.45387646988174501", 
"0.45387646988174501"), ...16 = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, "WR", "W", "0", "0", "0", "0", "0", 
"0")), row.names = c(NA, -70L), class = c("tbl_df", "tbl", "data.frame"
))

I want to remove all rows before ...1 == "t". I'm importing multiple files and want to do this to all of them at the sametime, and the header "t" appears at a different row number in each file.

I have tried

df1 = df[-c(1:row_number(df$...1 =="t")),]
df1 = df[-c(rownames(df[df$...1 =="t",])),]

I'd like a base R or dplyr solution. Thanks

CodePudding user response:

In dplyr, the slice function can be used to select rows by index, and the base-R which() can tell you which row index to start at.

df %>%
  slice(min(which(...1 == 't')):n())

This code will check for any rows on which ...1 == 't', then which() tells you the row index. min() is in case you get a file with two rows of 't'. Then, the slice picks all rows from that row you just found to the end (n()).

CodePudding user response:

in base R you could do:

df[-seq(which(df[, '...1'] == 't') - 1),]
  •  Tags:  
  • r
  • Related