Home > OS >  Merge Function Issues
Merge Function Issues

Time:03-25

I ran a merge function to combine some data sets and when I look at them the second row is showing "No data available in the table". Here is my code I'm working on this for a class project so I'm not the most experienced in R. This results in EX1 having data and EX2 and EX3 saying "No data available in the table". The other slim functions I created do have data in them and when i run the sapply(EX3, is.numeric) on EX1 EX2 and EX3 it says all of them are true except geoid which the teacher said is how its supposed to be. Anyone got any idea about this? Let me know if you need anymore information i will do my best to provide it. Thanks

Here are the files I am using https://easyupload.io/lkcd8u

install.packages("dplyr")
library(dplyr)

library(readxl)
S0801 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S0801.xlsx")
View(S0801)
library(readxl)
S1501 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S1501.xlsx")
View(S1501)
library(readxl)
S1502 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S1502.xlsx")
View(S1502)
library(readxl)
S2404 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S2404.xlsx")
View(S2404)

S2404_slim <- select(S2404,  GEO_ID, S2404_C01_001E, S2404_C01_002E, S2404_C01_005E, S2404_C01_006E, S2404_C01_007E, S2404_C01_008E, S2404_C01_009E, S2404_C01_012E, S2404_C01_013E, S2404_C01_016E, S2404_C01_020E, S2404_C01_023E, S2404_C01_027E)

S1501_slim <- select(S1501, GEO_ID, S1501_C02_009E, S1501_C02_010E, S1501_C02_011E, S1501_C02_012E, S1501_C02_013E, S1501_C02_015E, S1501_C02_018E, S1501_C02_021E, S1501_C02_024E, S1501_C02_027E, S1501_C01_059E)

S1502_slim <- select(S1502, GEO_ID, S1502_C02_002E, S1502_C02_003E, S1502_C02_004E, S1502_C02_005E, S1502_C02_006E)

S0801_slim <- select(S0801, GEO_ID, S0801_C01_001E, S0801_C01_002E, S0801_C01_003E, S0801_C01_004E, S0801_C01_005E, S0801_C01_006E, S0801_C01_007E, S0801_C01_009E, S0801_C01_010E, S0801_C01_011E, S0801_C01_012E, S0801_C01_013E, S0801_C01_046E)

EX1 <- merge(S2404_slim,S1501_slim)
EX2 <- merge(S1502_slim,S0801_slim)
EX3 <- merge(EX1,EX2)

sapply(EX3, is.numeric)

CodePudding user response:

You missed all=TRUE argument in merge(). Without it only rows are merged, that are common in all data frames. S0801_slim$GEO_ID is different from that of the others.

EX1 <- merge(S2404_slim, S1501_slim)
EX2 <- merge(S1502_slim, S0801_slim, all=TRUE)
EX3 <- merge(EX1, EX2, all=TRUE)

Note that you may use this Reduce approach to merge multiple data frames at once:

Reduce(\(...) merge(..., all=TRUE), list(S2404_slim, S1501_slim, S1502_slim, S0801_slim))
# GEO_ID S2404_C01_001E S2404_C01_002E S2404_C01_005E S2404_C01_006E S2404_C01_007E
# 1   0100000US             NA             NA             NA             NA             NA
# 2 0400000US06       10907989         207254         754372        1397968         445030
# S2404_C01_008E S2404_C01_009E S2404_C01_012E S2404_C01_013E S2404_C01_016E S2404_C01_020E
# 1             NA             NA             NA             NA             NA             NA
# 2        1072689         596688         360822         861556        1404664        1907454
# S2404_C01_023E S2404_C01_027E S1501_C02_009E S1501_C02_010E S1501_C02_011E S1501_C02_012E
# 1             NA             NA             NA             NA             NA             NA
# 2         755348         642283           20.4           20.9              8           21.6
# S1501_C02_013E S1501_C02_015E S1501_C02_018E S1501_C02_021E S1501_C02_024E S1501_C02_027E
# 1             NA             NA             NA             NA             NA             NA
# 2           13.1           34.7           37.7             38           32.7           32.1
# S1501_C01_059E S1502_C02_002E S1502_C02_003E S1502_C02_004E S1502_C02_005E S1502_C02_006E
# 1             NA             NA             NA             NA             NA             NA
# 2          44077           40.7            7.9           18.6            7.2           25.7
# S0801_C01_001E S0801_C01_002E S0801_C01_003E S0801_C01_004E S0801_C01_005E S0801_C01_006E
# 1      139255035           86.4             76           10.4              8            1.4
# 2             NA             NA             NA             NA             NA             NA
# S0801_C01_007E S0801_C01_009E S0801_C01_010E S0801_C01_011E S0801_C01_012E S0801_C01_013E
# 1              1            4.9            2.8            0.5            1.2            4.1
# 2             NA             NA             NA             NA             NA             NA
# S0801_C01_046E
# 1           25.2
# 2             NA

Data:

temp <- tempdir()  ## create tempdir
temp <- unzip('~/Downloads/Final Sheets.zip')  ## unzip download
list2env(setNames(lapply(temp, openxlsx::read.xlsx),  ## unpack to workspace
                  c('S0801', 'S1501', 'S1502', 'S2404')), .GlobalEnv)
unlink(temp)  ## delete tempdir

S2404_slim <- subset(S2404, 
                     select=c(GEO_ID, S2404_C01_001E, S2404_C01_002E, 
                              S2404_C01_005E, S2404_C01_006E, S2404_C01_007E,
                              S2404_C01_008E, S2404_C01_009E, S2404_C01_012E, 
                              S2404_C01_013E, S2404_C01_016E, S2404_C01_020E, 
                              S2404_C01_023E, S2404_C01_027E))

S1501_slim <- subset(S1501, select=c(GEO_ID, S1501_C02_009E, S1501_C02_010E, 
                                     S1501_C02_011E, S1501_C02_012E, S1501_C02_013E,
                                     S1501_C02_015E, S1501_C02_018E, S1501_C02_021E, 
                                     S1501_C02_024E, S1501_C02_027E, S1501_C01_059E))

S1502_slim <- subset(S1502, select=c(GEO_ID, S1502_C02_002E, S1502_C02_003E, 
                                     S1502_C02_004E, S1502_C02_005E, S1502_C02_006E))

S0801_slim <- subset(S0801, select=c(GEO_ID, S0801_C01_001E, S0801_C01_002E,
                                     S0801_C01_003E, S0801_C01_004E, S0801_C01_005E, 
                                     S0801_C01_006E, S0801_C01_007E, S0801_C01_009E, 
                                     S0801_C01_010E, S0801_C01_011E, S0801_C01_012E, 
                                     S0801_C01_013E, S0801_C01_046E))

CodePudding user response:

I'm using you're trying to merge on the GEOID? If that's the case, you might want to include on = GEOID in your merge.

merge(EX1,EX2, on="GEOID")

But in regards to your data, it looks like there is only one row in each of the excel sheets you're reading in. Is it supposed to be that way?

The reason EX2 and EX3 don't have any data is because the dataframes you are trying to merge do not share any of the same the data.

Also, just a note: you don't need to import a library more than once.

  • Related