I have the following data, as show below.
I want to merge such that the returns from DF2 (Columns "US", "CA") are included in DF1 with respect to date and Country. I know how to merge with respect to columns, but do not know how to match with the correct country.
I would greatly appreciate your help.
DF1
Company Country Delt.1.arithmetic date
2021-03-01 TSLA CA NA 2021-03-01
2021-03-02 TSLA CA -0.04452764 2021-03-02
2021-03-03 MSFT US -0.04842374 2021-03-03
2021-03-04 TSLA CA -0.04862218 2021-03-04
2021-03-05 MSFT US -0.03779929 2021-03-05
2021-03-08 TSLA CA -0.05844972 2021-03-08
DF2
US CA date
-0.02 -0.02 2021-03-01
-0.02 -0.02 2021-03-02
-0.02 -0.02 2021-03-03
-0.02 -0.02 2021-03-04
-0.02 -0.02 2021-03-05
-0.02 -0.02 2021-03-08
I want:
Company Country Delt.1.arithmetic date Return
2021-03-01 TSLA CA NA 2021-03-01 0,02
2021-03-02 TSLA CA -0.04452764 2021-03-02 0,02
2021-03-03 MSFT US -0.04842374 2021-03-03 0,01
2021-03-04 TSLA CA -0.04862218 2021-03-04 0,01
2021-03-05 MSFT US -0.03779929 2021-03-05 0,02
2021-03-08 TSLA CA -0.05844972 2021-03-08 0,02
Code for sample data:
require(quantmod)
require(zoo)
# STOCK DATA.FRAME
TSLA <- as.data.frame(getSymbols.yahoo("TSLA", from="2021-03-01", verbose=F, auto.assign=F))
AAPL <- as.data.frame(getSymbols.yahoo("AAPL", from="2021-03-01", verbose=F, auto.assign=F))
MSFT <- as.data.frame(getSymbols.yahoo("MSFT", from="2021-03-01", verbose=F, auto.assign=F))
TSLA$Company <- c("TSLA")
AAPL$Company <- c("AAPL")
MSFT$Company <- c("MSFT")
TSLA$Country <- "CA"
AAPL$Country <- "US"
MSFT$Country <- "US"
TSLA$Return <- Delt(TSLA$TSLA.Adjusted)
AAPL$Return <- Delt(AAPL$AAPL.Adjusted)
MSFT$Return <- Delt(MSFT$MSFT.Adjusted)
colnames(TSLA) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted", "Company", "Country", "Return")
colnames(AAPL) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted", "Company","Country", "Return")
colnames(MSFT) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted", "Company","Country", "Return")
stocks <- rbind(TSLA, AAPL, MSFT)
stocks$Open <- NULL
stocks$Close <- NULL
stocks$High <- NULL
stocks$Low <- NULL
stocks$Volume <- NULL
stocks$Adjusted <- NULL
stocks$date <- row.names(stocks)
# Market returns
date <- stocks$date
US <- sample(c(-0.02, -0.02, -0.02, -0.02, 0.15, -0.02, -0.02, -0.02, -0.02), size = nrow(stocks), replace=T)
CA <- sample(c(-0.02, -0.02, -0.02, -0.02, 0.15, -0.02, -0.02, -0.02, -0.02), size = nrow(stocks), replace=T)
market <- data.frame(US,CA,date)
rm(AAPL,MSFT,US,CA,TSLA, date)
CodePudding user response:
library(dplyr)
library(tidyr)
df1 %>%
left_join(df2 %>% pivot_longer(-date, names_to = "Country", values_to = "Return"))
Returns:
Company Country Delt.1.arithmetic date Return 1 TSLA CA NA 2021-03-01 -0.02 2 TSLA CA -0.04452764 2021-03-02 -0.02 3 MSFT US -0.04842374 2021-03-03 -0.02 4 TSLA CA -0.04862218 2021-03-04 -0.02 5 MSFT US -0.03779929 2021-03-05 -0.02 6 TSLA CA -0.05844972 2021-03-08 -0.02
I was not able to reproduce your desired output with the values you supplied (Return is all -0.02 for my data, while your desired output has also -0.01...). And since no direction for the join was specified I assumed left. Adapt it so it fits your case.