Tidying arrays of numbers in R data frame-CodePudding

How can I tidy up the following data frame

data.frame(a  = c(1,2), values = c("[1.1, 1.2, 1.3]", "[2.1, 2.2]"))

 a          values
1 [1.1, 1.2, 1.3]
2      [2.1, 2.2]

The result should be

data.frame(a  = c(1,1,1,2,2), values = c(1.1, 1.2, 1.3, 2.1, 2.2))
  a values
 1    1.1
 1    1.2
 1    1.3
 2    2.1
 2    2.2

CodePudding user response：

We may extract the numbers with str_extract_all in a list and unnest

library(dplyr)
library(stringr)
library(tidyr)
df1 %>%
    mutate(values = str_extract_all(values, "[0-9.] ")) %>% 
    unnest(values) %>% 
    type.convert(as.is = TRUE)

-output

# A tibble: 5 × 2
      a values
  <int>  <dbl>
1     1    1.1
2     1    1.2
3     1    1.3
4     2    2.1
5     2    2.2

Or another option is to evaluate the python object with reticulate:py_eval and then unnest the list column

library(reticulate)
df1 %>%
    rowwise %>%
     mutate(values = list(py_eval(values))) %>%
     unnest(values)

-output

# A tibble: 5 × 2
      a values
  <dbl>  <dbl>
1     1    1.1
2     1    1.2
3     1    1.3
4     2    2.1
5     2    2.2

data

df1 <- data.frame(a  = c(1,2), values = c("[1.1, 1.2, 1.3]", "[2.1, 2.2]"))

CodePudding user response：

Another possible solution:

library(tidyverse)

df <- data.frame(a  = c(1,2), values = c("[1.1, 1.2, 1.3]", "[2.1, 2.2]"))

df %>% 
  separate(values, into=LETTERS[1:5], sep="\\[|,|\\]", fill="right",convert=T) %>% 
  pivot_longer(-a, values_drop_na = T) %>% 
  select(-name)

#> # A tibble: 5 × 2
#>       a value
#>   <dbl> <dbl>
#> 1     1   1.1
#> 2     1   1.2
#> 3     1   1.3
#> 4     2   2.1
#> 5     2   2.2

CodePudding user response：

Another base R option using eval expression gsub

with(
  df,
  type.convert(
    setNames(
      rev(
        stack(
          sapply(
            setNames(values, a),
            function(x) eval(str2expression(gsub("\\[(.*)\\]", "c(\\1)", x)))
          )
        )
      ), names(df)
    ),
    as.is = TRUE
  )
)

gives