Hi I have this original data set. I need the order of IDs and aliquot number for each sample type to remain the same, but need to order based on the sample type.
In the original data, the order doesn't remain the same I need the order to be "S,B,E,N" like the intended data.
Hope this makes sense. Your help would be really appreciated
Original Data
structure(list(ID = c("0226", "0226", "0226", "0226", "0226",
"0226", "0226", "0226", "0226", "0226", "0226", "0226", "0226",
"0226", "0226", "0226", "0226", "0226", "0053", "0053", "0053",
"0053", "0053", "0053", "0053", "0053", "0053", "0053", "0053",
"0053", "0053", "0053", "0053", "0053", "0053", "0053", "0053",
"0053"), `Unique Aliquot ID` = c("C0226BCV1B 01", "C0226BCV1B 02",
"C0226BCV1S 01", "C0226BCV1S 02", "C0226BCV1S 03", "C0226BCV1S 04",
"C0226BCV1S 05", "C0226BCV1S 06", "C0226BCV1S 07", "C0226BCV1E 01",
"C0226BCV1E 02", "C0226BCV1E 03", "C0226BCV1E 04", "C0226BCV1E 05",
"C0226BCV1E 06", "C0226BCV1N 01", "C0226BCV1N 02", "C0226BCV1N 03",
"C0053BCV1B 01", "C0053BCV1B 02", "C0053BCV1S 01", "C0053BCV1S 02",
"C0053BCV1S 03", "C0053BCV1S 04", "C0053BCV1S 05", "C0053BCV1S 06",
"C0053BCV1S 07", "C0053BCV1S 08", "C0053BCV1N 01", "C0053BCV1N 02",
"C0053BCV1N 03", "C0053BCV1E 01", "C0053BCV1E 02", "C0053BCV1E 03",
"C0053BCV1E 04", "C0053BCV1E 05", "C0053BCV1E 06", "C0053BCV1E 07"
), `sample type` = c("B", "B", "S", "S", "S", "S", "S", "S",
"S", "E", "E", "E", "E", "E", "E", "N", "N", "N", "B", "B", "S",
"S", "S", "S", "S", "S", "S", "S", "N", "N", "N", "E", "E", "E",
"E", "E", "E", "E"), `Aliquot Number` = c(1, 2, 1, 2, 3, 4, 5,
6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 1, 2, 3, 4, 5, 6, 7)), row.names = c(NA, -38L), class = c("tbl_df",
"tbl", "data.frame"))
Intended data
structure(list(ID = c("0226", "0226", "0226", "0226", "0226",
"0226", "0226", "0226", "0226", "0226", "0226", "0226", "0226",
"0226", "0226", "0226", "0226", "0226", "0053", "0053", "0053",
"0053", "0053", "0053", "0053", "0053", "0053", "0053", "0053",
"0053", "0053", "0053", "0053", "0053", "0053", "0053", "0053",
"0053"), `Unique Aliquot ID` = c("C0226BCV1S 01", "C0226BCV1S 02",
"C0226BCV1S 03", "C0226BCV1S 04", "C0226BCV1S 05", "C0226BCV1S 06",
"C0226BCV1S 07", "C0226BCV1B 01", "C0226BCV1B 02", "C0226BCV1E 01",
"C0226BCV1E 02", "C0226BCV1E 03", "C0226BCV1E 04", "C0226BCV1E 05",
"C0226BCV1E 06", "C0226BCV1N 01", "C0226BCV1N 02", "C0226BCV1N 03",
"C0053BCV1S 01", "C0053BCV1S 02", "C0053BCV1S 03", "C0053BCV1S 04",
"C0053BCV1S 05", "C0053BCV1S 06", "C0053BCV1S 07", "C0053BCV1S 08",
"C0053BCV1B 01", "C0053BCV1B 02", "C0053BCV1E 01", "C0053BCV1E 02",
"C0053BCV1E 03", "C0053BCV1E 04", "C0053BCV1E 05", "C0053BCV1E 06",
"C0053BCV1E 07", "C0053BCV1N 01", "C0053BCV1N 02", "C0053BCV1N 03"
), `sample type` = c("S", "S", "S", "S", "S", "S", "S", "B",
"B", "E", "E", "E", "E", "E", "E", "N", "N", "N", "S", "S", "S",
"S", "S", "S", "S", "S", "B", "B", "E", "E", "E", "E", "E", "E",
"E", "N", "N", "N"), `Aliquot Number` = c(1, 2, 3, 4, 5, 6, 7,
1, 2, 1, 2, 3, 4, 5, 6, 1, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
1, 2, 3, 4, 5, 6, 7, 1, 2, 3)), row.names = c(NA, -38L), class = c("tbl_df",
"tbl", "data.frame"))
CodePudding user response:
You can order your data by turning your sample type
column into a factor
and then use order
with ID
like this:
df$`sample type` <- factor(df$`sample type`, levels=c("S", "B", "E", "N"))
df[order(df$ID, df$`sample type`), ]
Output:
# A tibble: 38 × 4
ID `Unique Aliquot ID` `sample type` `Aliquot Number`
<chr> <chr> <fct> <dbl>
1 0053 C0053BCV1S 01 S 1
2 0053 C0053BCV1S 02 S 2
3 0053 C0053BCV1S 03 S 3
4 0053 C0053BCV1S 04 S 4
5 0053 C0053BCV1S 05 S 5
6 0053 C0053BCV1S 06 S 6
7 0053 C0053BCV1S 07 S 7
8 0053 C0053BCV1S 08 S 8
9 0053 C0053BCV1B 01 B 1
10 0053 C0053BCV1B 02 B 2
# … with 28 more rows
CodePudding user response:
You can make sample type
into a factor and define the levels in the way that you want it to be ordered. Since you want to keep the ID
ordering the same as the original, you can maintain that order by making that a factor as well. Here's an example:
library(dplyr)
df_out <- df_orig %>%
mutate(
ID = factor(ID, levels = unique(ID)),
`sample type` = factor(`sample type`, levels = c("S", "B", "E", "N"))) %>%
arrange(ID, `sample type`)
Here's the output:
# A tibble: 38 × 4
ID `Unique Aliquot ID` `sample type` `Aliquot Number`
<fct> <chr> <fct> <dbl>
1 0226 C0226BCV1S 01 S 1
2 0226 C0226BCV1S 02 S 2
3 0226 C0226BCV1S 03 S 3
4 0226 C0226BCV1S 04 S 4
5 0226 C0226BCV1S 05 S 5
6 0226 C0226BCV1S 06 S 6
7 0226 C0226BCV1S 07 S 7
8 0226 C0226BCV1B 01 B 1
9 0226 C0226BCV1B 02 B 2
10 0226 C0226BCV1E 01 E 1
# … with 28 more rows