Home > Software engineering >  Append data.frame objects from an environment to corresponding data.frame objects in GlobalEnv (or a
Append data.frame objects from an environment to corresponding data.frame objects in GlobalEnv (or a

Time:10-08

I have several existing data.frame objects that need to be updated from the Internet. However, as the updates have the same names as the mentioned existing objects, I put the updates in a separate environment also as data.frame objects.

Then, the idea is to append the updates to the existing data.frame objects. But I don't see how I can do that iteratively (i.e., in a loop?) with rbind from one environment to GlobalEnv (or another environment, for that matter).

Also, I did not put them here, but there will be several other data.frame objects (with other names) that will in the GlobalEnv (or the environment where they will be loaded).

Here below is a piece of code that should be reproducible (with comments and links to the sources):

library(quantmod)

# Load ticker data from 2020-01-01 till 2021-02-02
tickers <- c("NKLA", "MPNGF", "RMO", "JD", "MSFT")
getSymbols.yahoo(tickers, auto.assign = TRUE, env = globalenv(), 
                 from = "2020-01-01", to = "2021-02-02")

# Close all Internet connections as a precaution
# https://stackoverflow.com/a/52758758/2950721
closeAllConnections()

# Find xts objects
xtsObjects <- names(which(unlist(eapply(.GlobalEnv, is.xts))))

# Convert xts to data.frame
# https://stackoverflow.com/a/69246047/2950721
for (i in seq_along(xtsObjects)) {
  assign(xtsObjects[i], fortify.zoo(get(xtsObjects[i])))
}


# Redo the previous process but in separate environment for updated values of the same tickers (comments and sources are not repeated)
symbolUpdates.env <- new.env()

getSymbols.yahoo(tickers, auto.assign = TRUE, env = symbolUpdates.env,
                 from = "2020-02-03")

closeAllConnections()

symbolUpdatesXtsObjects <- names(which(unlist(eapply(symbolUpdates.env, is.xts))))

for (i in seq_along(symbolUpdatesXtsObjects)) {
  assign(envir = symbolUpdates.env, symbolUpdatesXtsObjects[i], 
         fortify.zoo(get(xtsObjects[i])))
}

# Find ```data.frame``` objects both in ```GlobalEnv``` and ```symbolUpdates.env```
globalEnvDataframeObjects <- names(which(unlist(eapply(.GlobalEnv, is.data.frame))))
symbolUpdatesDataframeObjects <- names(which(unlist(eapply(symbolUpdates.env, is.data.frame))))


# This rbind definitely does not work!!!
for (i in seq_along(globalEnvDataframeObjects)) {
  rbind(envir = .GlobalEnv, globalEnvDataframeObjects[i], envir =
  symbolUpdates.env, symbolUpdatesDataframeObjects[i])
}

My questions:

  • With preferably no additional packages than the basic R ones, what piece of code can iteratively append symbolUpdatesDataframeObjects to the corresponding globalEnvDataframeObjects?
  • Would the code be the same should globalEnvDataframeObjects be in another environment (i.e., not .GlobalEnv, but a "sub-environment" like symbolUpdates.env?
    • If not, what would change?
  • Is there a better/wiser approach than the one I'm trying to use?

Thanks in advance.


Systems used:

  • R version: 4.1.1 (2021-08-10)
  • RStudio version: 1.4.1717
  • OS: macOS Catalina version 10.15.7 and macOS Big Sur version 11.6

CodePudding user response:

We may need intersect here

interObj <- intersect(globalEnvDataframeObjects, symbolUpdatesDataframeObjects)
interObj <- interObj[match(interObj, symbolUpdatesDataframeObjects)]
nrow(get(interObj[1]))
[1] 273
for (i in seq_along(interObj)) {
  assign(interObj[i], rbind(get(interObj[i], envir = .GlobalEnv), 
    get(symbolUpdatesDataframeObjects[i], envir = symbolUpdates.env)), envir = .GlobalEnv)
}

CodePudding user response:

# Install pacakges if they are not already installed: necessary_packages => vector
necessary_packages <- c("quantmod")

# Create a vector containing the names of any packages needing installation:
# new_pacakges => vector
new_packages <- necessary_packages[!(necessary_packages %in%
                                       installed.packages()[, "Package"])]

# If the vector has more than 0 values, install the new pacakges
# (and their) associated dependencies:
if(length(new_packages) > 0){
  install.packages(new_packages, dependencies = TRUE)
}

# Initialise the packages in the session: list of boolean => stdout (console)
lapply(
  necessary_packages, 
  require, 
  character.only = TRUE
)

# Load ticker data from 2020-01-01 till 2021-02-02
tickers <- c("NKLA", "MPNGF", "RMO", "JD", "MSFT")

# Create a new environment: environment => symbolUpdates.env
symbolUpdates.env <- new.env()

# Create a vector of from dates: from_dates => Date Vector
from_dates <- as.Date(c("2020-01-01", "2020-02-03"))

# Create a vector of to dates:
to_dates <- as.Date(
  c(
    "2021-02-02", 
    format(
      Sys.Date(),
      "%Y-%m-%d"
    )
  )
)

# Create a vetor environments: env_vec => vector of environments
env_vec <- c(
  .GlobalEnv, 
  symbolUpdates.env
)

# Function to retreive ticker as a data.frame: 
# retrieve_ticker_df => function()
retrieve_ticker_df <- function(ticker_vec, from_date, to_date){

  # Create a list of size length(tickers):
  # df_list => empty list
  df_list <- vector(
    "list", 
    length(ticker_vec)
  )
  
  # Store each ticker's response as a data.frame in the list:
  # df_list => list of data.frames
  df_list <- setNames(
    lapply(
      seq_along(ticker_vec),
      function(i){
        # Retrieve the data.frame: tmp => data.frame
        tmp <- getSymbols.yahoo(
          ticker_vec[i],
          auto.assign = FALSE, 
          from = from_date,
          to = to_date,
          return.class = 'data.frame',
        )
        # Close all Internet connections as a precaution
        # https://stackoverflow.com/a/52758758/2950721
        closeAllConnections()
        
        # Create a data.frame and revert index to sequential
        # integers: data.frame => env
        data.frame(
          cbind(
            date = as.Date(
              row.names(
                tmp
              )
            ),
            tmp
          ),
          row.names = NULL
        )
      }
    ),
    ticker_vec
  )
  # Explicitly define returned object: list of data.frames => env
  return(df_list)
}

# Store all the data.frames in a list of data.frames, 
# store each list of data.frames in a list: 
# ticker_df_list_list => list of list of data.frames
ticker_df_list_list <- lapply(
  seq_along(env_vec),
  function(i){
    retrieve_ticker_df(
      tickers, 
      from_dates[i], 
      to_dates[i]
    )
  }
)

# Push each of the lists to the appropriate environment: 
# data.frames => env
lapply(
  seq_along(ticker_df_list_list),
  function(i){
    list2env(
      ticker_df_list_list[[i]],
      envir = env_vec[[i]]
    )
  }
)

# Initialise an empty list to create some memory
# bound_df_list => empty list
bound_df_list <- vector(
  "list", 
  length(ticker_df_list_list[[1]])
)

# Create a product of the list of data.frames:
# bound_df_list => list of data.frames
bound_df_list <- setNames(
    Map(function(x){
      data.frame(
        rbind(
          get(x, envir = env_vec[[1]]),
          get(x, envir = env_vec[[2]])
        ),
        row.names = NULL
      )
    },
    tickers
  ),
  tickers
)

# Clear up the intermediate objects:
rm(ticker_df_list_list, env_vec); gc()
  • Related