Home > Net >  Migrate base R plot to ggplot2
Migrate base R plot to ggplot2

Time:04-26

This is a 10-column excerpt from a 25k-column dataset:

bnc7log_t <- structure(c(3.8073549220576, 17.0416377642775, 14.1180488428888, 
                         10.5507467853832, 9.01959072835788, 15.3172006034227, 0, 6.85798099512757, 
                         17.3531640594242, 14.2536997813742, 1, 13.9409558756115, 13.8783383498952, 
                         11.5783726913608, 6.85798099512757, 15.013933264159, 14.1180488428888, 
                         16.2415363334906, 14.4605198895249, 16.2071287425943, 6.52356195605701, 
                         6.85798099512757, 17.0416377642775, 12.0864679934641, 10.1786648510065, 
                         14.1270270430046, 17.0416377642775, 14.2651022184843, 16.2071287425943, 
                         16.2071287425943, 3.16992500144231, 0, 16.2071287425943, 1.58496250072116, 
                         8.24317398347295, 16.2071287425943, 16.2071287425943, 10.8470573460913, 
                         3.90689059560852, 14.4809794856131, 12.111787735801, 10.4325419003883, 
                         16.2071287425943, 16.2071287425943, 6.39231742277876, 15.3172006034227, 
                         5.95419631038687, 15.3426997374694, 15.8411957647929, 16.2071287425943, 
                         9.18487534290828, 15.101770541626, 16.9654686605993, 10.5449644327892, 
                         11.7780771295354, 10.4429434958487, 16.2071287425943, 9.63662462054365, 
                         14.1249290466025, 16.4715482109991, 15.2312961941021, 16.4566110995749, 
                         15.950419202689, 16.2071287425943, 1, 2.8073549220576, 5.70043971814109, 
                         12.5636729598275, 3.58496250072116, 3), .Dim = c(7L, 10L), .Dimnames = list(
                           c("f1log", "f2log", "f3log", "f4log", "f5log", "f6log", "f7log"
                           ), NULL))

I'm plotting the data in base R but the execution takes ages, probably due to the for loop:

plot(bnc7log_t[,1],
     main = paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)"), 
     cex.main = 0.9, cex.axis = 0.9, cex.lab = 0.9,
     ylab = "Within-turn frequencies (log2)",
     xlab = "Turn positions",
     ylim = c(0,18),
     type = "n", col = "grey", frame = F,
     xaxt = "n")
axis(1, at = 1:7, labels = paste0("w", 1:7), cex.axis = 0.8)
grid(nx = 10)

for(i in 1:ncol(bnc7log_t)){
  lines(jitter(bnc7log_t[,i], factor = 2), 
        type = "l"
        )
}

I'm sure execution is speedier in ggplot2 - just how can it the plot be migrated to ggplot2?

CodePudding user response:

Is this close to what you want?

library(tidyverse)

as_tibble(bnc7log_t) %>% 
  mutate(Row=1:7) %>% 
  pivot_longer(
    starts_with("V"),
    values_to="Value",
    names_to="Column"
  ) %>% 
  ggplot()  
    geom_line(aes(x=Row, y=Value, group=Column))  
    labs(
      x="Turn positions", 
      y="Within-turn frequencies (log2)",
      title=paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)")
    )  
    scale_x_continuous(breaks=1:7, labels=paste0("w", 1:7))

enter image description here

CodePudding user response:

Here are two solutions, one of them a base R solution though it is not asked for. The reason why this is because many times forgotten function matplot might do the job more quickly than the for loop in the question.

bnc7log_t <- structure(
  c(3.8073549220576, 17.0416377642775, 14.1180488428888, 
    10.5507467853832, 9.01959072835788, 15.3172006034227, 0, 6.85798099512757, 
    17.3531640594242, 14.2536997813742, 1, 13.9409558756115, 13.8783383498952, 
    11.5783726913608, 6.85798099512757, 15.013933264159, 14.1180488428888, 
    16.2415363334906, 14.4605198895249, 16.2071287425943, 6.52356195605701, 
    6.85798099512757, 17.0416377642775, 12.0864679934641, 10.1786648510065, 
    14.1270270430046, 17.0416377642775, 14.2651022184843, 16.2071287425943, 
    16.2071287425943, 3.16992500144231, 0, 16.2071287425943, 1.58496250072116, 
    8.24317398347295, 16.2071287425943, 16.2071287425943, 10.8470573460913, 
    3.90689059560852, 14.4809794856131, 12.111787735801, 10.4325419003883, 
    16.2071287425943, 16.2071287425943, 6.39231742277876, 15.3172006034227, 
    5.95419631038687, 15.3426997374694, 15.8411957647929, 16.2071287425943, 
    9.18487534290828, 15.101770541626, 16.9654686605993, 10.5449644327892, 
    11.7780771295354, 10.4429434958487, 16.2071287425943, 9.63662462054365, 
    14.1249290466025, 16.4715482109991, 15.2312961941021, 16.4566110995749, 
    15.950419202689, 16.2071287425943, 1, 2.8073549220576, 5.70043971814109, 
    12.5636729598275, 3.58496250072116, 3), 
  .Dim = c(7L, 10L), 
  .Dimnames = list(
    c("f1log", "f2log", "f3log", "f4log", "f5log", "f6log", "f7log"), 
    NULL))

suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
  library(ggplot2)
})



matplot(
  bnc7log_t, type = "l", lty = "solid", col = "grey",
  main = paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)"), 
  cex.main = 0.9, cex.axis = 0.9, cex.lab = 0.9,
  ylab = "Within-turn frequencies (log2)",
  xlab = "Turn positions"
)

bnc7log_t %>%
  as.data.frame() %>%
  pivot_longer(everything()) %>%
  tibble::rownames_to_column("x") %>% 
  mutate(x = as.integer(x)) %>%
  ggplot(aes(x, value, group = name))  
  geom_line(colour = "grey", alpha = 0.5)   
  labs(x = "Turn positions", y = "Within-turn frequencies (log2)")  
  ggtitle(paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)"))  
  theme_bw()

Created on 2022-04-25 by the reprex package (v2.0.1)

  • Related