Create a new dataframe that will include the class() of every column of another dataframe-CodePudding

I have the example dataframe below and I want to create a new dataframe using R with 2 columns. The first column will be named "Name" and will include all the column names of the dataset. The dataset will be different every time so the number of column may vary. The 2nd column will be named "Class" and will include the class() of every column.

structure(list(case_id = c("3397364", "3397364"), action = c("3397364-RAAMELK", 
"3397364-RAAMELK"), resource = c("RAAMELK", "RAAMELK"), lifecycle = c(1, 
1), registration_type = structure(1:2, .Label = c("start", "complete"
), class = "factor"), timestamp = structure(c(1667523600, 1667531220
), tzone = "UTC", class = c("POSIXct", "POSIXt")), activity = c("RAAMELK", 
"RAAMELK"), activity_description = c("Forbrukt r<e5>melk", "Forbrukt r<e5>melk"
), ...9 = c(NA, NA), product = c("K101152", "K101152"), product_type_text = c("200100 - Milk", 
"200100 - Milk"), qty = c(NA, 31), in_out = c("in", "out"), qty_scrap = c(NA_real_, 
NA_real_), `FP ordre` = c(NA_character_, NA_character_), Artikkeltype = c("SF", 
"SF"), .order = 1:2), row.names = c(NA, -2L), class = c("eventlog", 
"log", "tbl_df", "tbl", "data.frame"), case_id = "case_id", activity_id = "activity", activity_instance_id = "action", lifecycle_id = "registration_type", resource_id = "resource", timestamp = "timestamp")

CodePudding user response：

Using a tibble (because data is in tbl_df) with sapply. The paste is needed because some class definitions return more than one string.

library(tibble)

tibble(Name = colnames(df), Class = sapply(df, function(x) 
  paste(class(x), collapse=", ")))
# A tibble: 17 × 2
   Name                 Class          
   <chr>                <chr>          
 1 case_id              character      
 2 action               character      
 3 resource             character      
 4 lifecycle            numeric        
 5 registration_type    factor         
 6 timestamp            POSIXct, POSIXt
 7 activity             character      
 8 activity_description character      
 9 ...9                 logical        
10 product              character      
11 product_type_text    character      
12 qty                  numeric        
13 in_out               character      
14 qty_scrap            numeric        
15 FP ordre             character      
16 Artikkeltype         character      
17 .order               integer

CodePudding user response：

dat <- stack(lapply(df, class))[2:1] 
colnames(dat) <- c("Name", "Class")

> dat
#                    Name     Class
# 1               case_id character
# 2                action character
# 3              resource character
# 4             lifecycle   numeric
# 5     registration_type    factor
# 6             timestamp   POSIXct
# 7             timestamp    POSIXt
# 8              activity character
# 9  activity_description character
# 10                 ...9   logical
# 11              product character
# 12    product_type_text character
# 13                  qty   numeric
# 14               in_out character
# 15            qty_scrap   numeric
# 16             FP ordre character
# 17         Artikkeltype character
# 18               .order   integer

You can also check summary.default for a quick way:

summary.default(df)
#                      Length Class   Mode     
# case_id              2      -none-  character
# action               2      -none-  character
# resource             2      -none-  character
# lifecycle            2      -none-  numeric  
# registration_type    2      factor  numeric  
# timestamp            2      POSIXct numeric  
# activity             2      -none-  character
# activity_description 2      -none-  character
# ...9                 2      -none-  logical  
# product              2      -none-  character
# product_type_text    2      -none-  character
# qty                  2      -none-  numeric  
# in_out               2      -none-  character
# qty_scrap            2      -none-  numeric  
# FP ordre             2      -none-  character
# Artikkeltype         2      -none-  character
# .order               2      -none-  numeric