In R: shorten list from behind till a certain value-CodePudding

Suppose i have many and larger lists than in this example.

ID <- c(2,2,2,2,5,5,5,5,9,9,9,9)
Rounds <- c(0,1,2,3,0,1,2,3,0,1,2,3)
solution <- c(as.character('[[-1,1,1,-1],[1,-1,1,-1]]'),as.character('[[-1,1,-1,-1],[1,-1,1,-1]]'),as.character('[[-1,1,1,-1],[1,-1,-1,-1]]') )
solution_resp <- c(as.character('[[-1,1,1,1],[1,1,-1,1]]'),as.character('[[1,-1,1,-1],[1,-1,1,1]]'),as.character('[[-1,1,1,1],[1,-1,1,-1]]'))

dt <- data.frame(ID,Rounds,solution,solution_resp)

dt$solution <- gsub('],[',',', dt$solution, fixed=TRUE)
dt$solution <- gsub('[[',''  , dt$solution, fixed=TRUE)
dt$solution <- gsub(']]',''  , dt$solution, fixed=TRUE)

dt$solution <- as.list(strsplit(dt$solution, ","))

dt$solution_resp <- gsub('],[',',', dt$solution_resp, fixed=TRUE)
dt$solution_resp <- gsub('[[',''  , dt$solution_resp, fixed=TRUE)
dt$solution_resp <- gsub(']]',''  , dt$solution_resp, fixed=TRUE)

dt$solution_resp <- as.list(strsplit(dt$solution_resp, ","))

Looks like this:

   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

In the lists dt$solution_resp I would like to drop all 1 from behind until the first -1 appears (counting from right to left).

In the example this would mean shortening the lists by taking off the last element in first row (and every third row). In the second row (and every third) the list shall be shorted by two elements. The third row remains as it is because there is a -1 at the last position already.

Then, I want the dt$solution lists to have the same length.

Solution:

   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1         1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1         1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1,        1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1,    -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1,        1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

CodePudding user response：

Find the Position of the last -1 starting at the right= and only keep up until this point:

sapply(dt$solution_resp, \(x) head(x, Position(identity, x=="-1", right=TRUE)) )

#[[1]]
#[1] "-1" "1"  "1"  "1"  "1"  "1"  "-1"
#
#[[2]]
#[1] "1"  "-1" "1"  "-1" "1"  "-1"
#
#[[3]]
#[1] "-1" "1"  "1"  "1"  "1"  "-1" "1"  "-1"
#...

CodePudding user response：

Just a tad longer than @thelatemail's one-liner.

dt |> 
  dplyr::select(
    ID,
    Rounds,
    s            = solution_resp,
  ) |> 
  dplyr::mutate(
    s = gsub("\\[|\\]", "", s),               # Remove the brackets
  ) |> 
  tibble::rowid_to_column("row_id") |> 
  tidyr::separate_rows(s, sep = ",") |> 
  dplyr::mutate(
    s = dplyr::recode(s, "-1" = T, "1" = F)   # Make the math easier later
  ) |> 
  dplyr::group_by(row_id, ID, Rounds) |> 
  dplyr::mutate(
    index       = seq_len(dplyr::n()),        # Remember the ordering
  ) |> 
  dplyr::arrange(dplyr::desc(index)) |>       # Reverse
  dplyr::mutate(
    cumulative = dplyr::cumany(s),            # Find the first (last) 1/F.
    s = dplyr::recode(as.character(s), "TRUE" = "-1", "FALSE" = "1")
  ) |> 
  dplyr::filter(cumulative) |>                # Drop everything afterwards
  dplyr::ungroup() |> 
  dplyr::arrange(ID, Rounds, row_id, index) |>  # Reverse again
  dplyr::group_by(row_id, ID, Rounds) |> 
  dplyr::summarize(
    solution_resp = paste(s, collapse = ", "),  # Smush back in a row
    element_count = dplyr::n(),                 # Quick count for inspection
  ) |> 
  dplyr::ungroup()

Output:

# A tibble: 12 x 5
   row_id    ID Rounds solution_resp             element_count
    <int> <dbl>  <dbl> <chr>                             <int>
 1      1     2      0 -1, 1, 1, 1, 1, 1, -1                 7
 2      2     2      1 1, -1, 1, -1, 1, -1                   6
 3      3     2      2 -1, 1, 1, 1, 1, -1, 1, -1             8
 4      4     2      3 -1, 1, 1, 1, 1, 1, -1                 7
 5      5     5      0 1, -1, 1, -1, 1, -1                   6
 6      6     5      1 -1, 1, 1, 1, 1, -1, 1, -1             8
 7      7     5      2 -1, 1, 1, 1, 1, 1, -1                 7
 8      8     5      3 1, -1, 1, -1, 1, -1                   6
 9      9     9      0 -1, 1, 1, 1, 1, -1, 1, -1             8
10     10     9      1 -1, 1, 1, 1, 1, 1, -1                 7
11     11     9      2 1, -1, 1, -1, 1, -1                   6
12     12     9      3 -1, 1, 1, 1, 1, -1, 1, -1             8

(I'm using the initial dt <- data.frame(ID,Rounds,solution,solution_resp) definition of dt).

CodePudding user response：

Using the original dt <- data.frame(ID,Rounds,solution,solution_resp) Do the following:

dt %>%
      mutate(across(contains('solution'),
            ~str_split(str_remove_all(.x,'\\[|\\]|(, ?1)\\1*\\] $'), ',')))
  ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

in Base R:

dt[3:4] <- gsub('\\[|\\]|(, ?1)\\1*\\] $', '', as.matrix(dt[3:4]))
dt[3:4]<-lapply(dt[3:4], strsplit, ',')
dt
   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1