Home > database >  Constructing a table from a given dataset using R
Constructing a table from a given dataset using R

Time:09-30

I have two datasets of character type,

a <- (ham, pizza, ham, ham, apple, Orange)

b <- (ham, guava, bread)

What I am trying to achieve is a table like below:

        ham   guava  bread  pizza  apple  orange

a        3      0      0      1      1       1

b        1      1      1      0      0       0

I tried table function which gives a frequency of occurrences but don't know how to put them in table. any help is much appreciated.

CodePudding user response:

library(tidyverse)

a <- c("ham", "pizza", "ham", "ham", "apple", "Orange")
b <- c("ham", "guava", "bread")
c <- c("ham", "guava", "pizza")

# Hand code.
bind_rows(
   data.frame(food=a) %>% group_by(food) %>% summarise(n=n()) %>% pivot_wider(names_from = food, values_from = n)
  ,data.frame(food=b) %>% group_by(food) %>% summarise(n=n()) %>% pivot_wider(names_from = food, values_from = n)
) %>% mutate_all(replace_na, replace = 0L)

# Or we can make a function that can take a list of food character vectors
foods <- function(foods) {
  map(foods, function(x){
    data.frame(food=x) %>% group_by(food) %>% summarise(n=n()) %>% pivot_wider(names_from = food, values_from = n)
  }) %>% 
    reduce(bind_rows) %>%
    mutate_all(replace_na, replace = 0L)
}

foods(list(a,b,c))

CodePudding user response:

Another method:

library(dplyr)
library(tidyr)
library(tibble)
library(purrr)

a <- c("ham", "pizza", "ham", "ham", "apple", "Orange")
b <- c("ham", "guava", "bread")

list("a" = a, "b" = b) |> 
  map_dfr(.id = "data", ~ enframe(.x) |> 
            count(value) |> 
            pivot_wider(values_from = n, names_from = value)) |> 
  mutate(across(everything(), replace_na, 0))
#> # A tibble: 2 x 7
#>   data  apple   ham Orange pizza bread guava
#>   <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
#> 1 a         1     3      1     1     0     0
#> 2 b         0     1      0     0     1     1

CodePudding user response:

I think some alteration and utilizing stack and reshape2::dcast may be a simpler solution:

library(reshape2)
a <- c("ham", "pizza", "ham", "ham", "apple", "Orange")
b <- c("ham", "guava", "bread")

vars <- list(a, b)
names(vars) <- c("a", "b")

df <- dcast(stack(vars), ind ~ values, value.var = "ind", fun = length)
print(df)

And when we create a function for it:

vecs_to_df <- function(...){
  varnames <- unlist(lapply(substitute(list(...))[-1], deparse))
  vars <- list(...)
  names(vars) <- varnames
  df <- dcast(stack(vars), ind ~ values, value.var = "ind", fun = length)
  return(df)
}

a <- c("ham", "pizza", "ham", "ham", "apple", "Orange")
b <- c("ham", "guava", "bread")
x <- c("ham", "Orange", "pizza")
y <- c("guava", "apple", "bread")

df <- vecs_to_df(a, b, x, y)
print(df)

CodePudding user response:

A simpler method using just dataframes in R

df1=data.frame(table(a))
df2=data.frame(table(b))

The dataframes look like

> df1
       a Freq
1  apple    1
2    ham    3
3 Orange    1
4  pizza    1
> df2
      b Freq
1 bread    1
2 guava    1
3   ham    1

We can then merge the two dataframes as

result= merge(df1,df2,all = T, by.x = 'a',by.y = 'b')
names(result)=c("fruits","a","b")

Output:

> results
  fruits  a  b
1  apple  1 NA
2    ham  3  1
3 Orange  1 NA
4  pizza  1 NA
5  bread NA  1
6  guava NA  1

The NA can be filled as

result[is.na(result)]=0

CodePudding user response:

You can stack the vectors and use table.

a <- c("ham", "pizza", "ham", "ham", "apple", "Orange")
b <- c("ham", "guava", "bread")

t(table(stack(mget(c("a","b")))))
#   values
#ind apple bread guava ham Orange pizza
#  a     1     0     0   3      1     1
#  b     0     1     1   1      0     0
  •  Tags:  
  • r
  • Related