Data
Here is the dput
of my data:
heart <- structure(list(died = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), levels = c("Survive",
"Died"), class = c("labelled", "factor"), label = "Death", format = "%8.0g", value.label.table = structure(0:1, names = c("Survive",
"Died"))), procedure = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L), levels = c("PTCA",
"CABG"), class = c("labelled", "factor"), label = "1=CABG; 0=PTCA", format = "%8.0g", value.label.table = structure(0:1, names = c("PTCA",
"CABG"))), age = structure(c(65L, 69L, 76L, 65L, 69L, 67L, 69L,
66L, 74L, 67L, 76L, 76L, 68L, 77L, 77L, 70L, 68L, 69L, 68L, 70L,
71L, 75L, 69L, 72L, 66L, 77L, 68L, 78L, 77L, 69L, 65L, 70L, 65L,
70L), label = "PATIENT AGE", class = c("labelled", "integer"), format = "%8.0g"),
gender = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L), levels = c("Female",
"Male"), class = c("labelled", "factor"), label = "Gender", format = "%8.0g", value.label.table = structure(0:1, names = c("Female",
"Male"))), los = structure(c(10L, 7L, 7L, 8L, 1L, 7L, 2L,
9L, 3L, 1L, 6L, 14L, 4L, 13L, 10L, 4L, 2L, 2L, 10L, 2L, 6L,
12L, 4L, 2L, 2L, 14L, 3L, 2L, 5L, 9L, 3L, 3L, 3L, 2L), label = "LOS", class = c("labelled",
"integer"), format = "%8.0g"), type = structure(c(1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L), levels = c("Elective", "Emer/Urg"), class = c("labelled",
"factor"), label = "Severity", format = "%8.0g", value.label.table = structure(0:1, names = c("Elective",
"Emer/Urg")))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-34L), stata.info = list(datalabel = "AZ 1991; CABG(106,107) & PTCA(112)",
version = 12L, time.stamp = "17 Feb 2015 12:45", val.labels = c("diedlb",
"pxllb", "", "sexllb", "", "typellb"), label.table = list(
pxllb = structure(0:1, names = c("PTCA", "CABG")), typellb = structure(0:1, names = c("Elective",
"Emer/Urg")), sexllb = structure(0:1, names = c("Female",
"Male")), diedlb = structure(0:1, names = c("Survive",
"Died")))))
Problem
So far I have fit multiple models like this using tensor splines to encode main and interaction effects:
#### Load Libraries ####
library(oddsratio)
library(mgcv)
library(splines)
library(tidyverse)
#### Fit GAM With Tensor Splines ####
fit <- gam(factor(died)
~ ti(age)
ti(los)
ti(age,los),
data = heart,
family = binomial)
However, when I try to plot the main effects:
plot_gam(model = fit,
pred = "age")
I get this odd warning:
Error in plot_df[[set_pred]]$x : $ operator is invalid for atomic vectors
I tried calling the function plot_gam
explicitly, which look like this, but I'm a bit fuzzy on why this code is causing the issue. Calling whatever gam_to_df
is also didn't seem to clarify the problem:
function (model = NULL, pred = NULL, col_line = "blue", ci_line_col = "black",
ci_line_type = "dashed", ci_fill = "grey", ci_alpha = 0.4,
ci_line_size = 0.8, sm_fun_size = 1.1, title = NULL, xlab = NULL,
ylab = NULL, limits_y = NULL, breaks_y = NULL)
{
df <- gam_to_df(model, pred)
if (is.null(xlab)) {
xlab <- df[[pred]]$xlab
}
if (is.null(ylab)) {
ylab <- df[[pred]]$ylab
}
plot_gam <- ggplot(df, aes_(~x, ~y)) geom_line(colour = col_line,
size = sm_fun_size) geom_line(aes_(~x, ~se_upr), linetype = ci_line_type,
colour = ci_line_col, size = ci_line_size) geom_line(aes_(~x,
~se_lwr), linetype = ci_line_type, colour = ci_line_col,
size = ci_line_size) geom_ribbon(aes_(x = ~x, ymin = ~se_lwr,
ymax = ~se_upr), fill = ci_fill, alpha = ci_alpha)
ylab(ylab) xlab(xlab)
if (!is.null(limits_y) & !is.null(breaks_y)) {
plot_gam <- plot_gam scale_y_continuous(breaks = c(breaks_y),
limits = c(limits_y))
}
else if (!is.null(limits_y) & is.null(breaks_y)) {
plot_gam <- plot_gam scale_y_continuous(limits = limits_y)
}
else if (is.null(limits_y) & !is.null(breaks_y)) {
plot_gam <- plot_gam scale_y_continuous(breaks = c(breaks_y))
}
if (!is.null(title)) {
plot_gam <- plot_gam ggtitle(title)
}
return(plot_gam)
}
I would greatly appreciate a resolution to this issue. The plot_gam
function is super useful for what I'm trying to achieve.
CodePudding user response:
I was inspecting the code. Error is caused by a fairly naïve implementation of oddsratio::gam_to_df
function.
gam_to_df <- function (model = NULL, pred = NULL)
{
plot_df <- no_plot(model)
set_pred <- grep(paste0("\\b", pred, "\\b"), plot_df)
df <- data.frame(x = plot_df[[set_pred]]$x, se_upr = plot_df[[set_pred]]$fit
plot_df[[set_pred]]$se, se_lwr = plot_df[[set_pred]]$fit -
plot_df[[set_pred]]$se, y = plot_df[[set_pred]]$fit)
return(df)
}
no_plot <- function (model = NULL)
{
png("temp.xyz")
plot_df <- plot(model, pages = 1)
dev.off()
file.remove("temp.xyz")
return(invisible(plot_df))
}
The function first create a plot and return it (it is a list) then extract the element that contains "age" somewhere in one of the elements, searching it via grep. Since two of three elements contains "age" it returns a vector of two elelments c(1,3). Thus plot_df[[set_pred]]
instead of selecting the first element, selects the first of the list and then the third element inside of the list, which is a vector.
Hence the error "operator is invalid for atomic vectors"
Appears to be a bug.
EDIT Filled an issue at https://github.com/pat-s/oddsratio/issues/54