I am trying to do a basic boxplot, and I can't get NA values away from it. I have tried many tricks for this issue. For example this one.
ggplot(df=subset(df, !is.na(sum_variable)), aes(x = gender, y = sum_variable, fill = gender))
stat_boxplot(geom ="errorbar", width = 0.5)
geom_boxplot(fill = "light blue")
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black")
ggtitle("Title")
theme_bw() theme(legend.position="none")
And this ggplot(na.omit(data), aes(x=luse, y=rich))
And none of these solve the issue. What would you recommend?
Data↓
structure(list(gender = structure(c(2L, 2L, NA, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
sum_variable = c(9, 6, 13, 3, 4, 3, 12, 2, 7, 8, 7, 4, 5,
10, 2, 5, 4, NA, 14, 9, 2, 5, 7, 3, NA, 3, 5, 7, 3, 8, 3,
3, 4, 8, 10, 9, 5, 7, 8, 4, 9, NA, 10, 14, 10, 3, 4, 10,
3, NA, 5, 3, 4, 4, NA, 5, 4, 6, 6, 9, 6, 2, 3, NA, 4, NA,
2, 2, 6, 5, 5, 3, 5, NA, 4, 4)), class = c("rowwise_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -76L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -76L), class = c("tbl_df",
"tbl", "data.frame")))
CodePudding user response:
You can filter your NA
values before creating the plot:
df %>%
filter(!is.na(gender)) %>%
ggplot(aes(x = gender, y = sum_variable, fill = gender))
stat_boxplot(geom ="errorbar", width = 0.5)
geom_boxplot(fill = "light blue")
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black")
ggtitle("Title")
theme_bw() theme(legend.position="none")
CodePudding user response:
Try removing NAs first before passing in the dataset.
sub_dta = na.omit(dta)
ggplot(data = sub_dta, aes(x = gender, y = sum_variable, fill = gender))
stat_boxplot(geom ="errorbar", width = 0.5)
geom_boxplot(fill = "light blue")
stat_summary(fun =mean, geom="point", shape=10, size=3.5, color="black")
ggtitle("Title")
theme_bw() theme(legend.position="none")
CodePudding user response:
One solution is to use complete.cases
ggplot(df[complete.cases(df), ], aes(x = gender, y = sum_variable,
fill = gender))
stat_boxplot(geom ="errorbar", width = 0.5)
geom_boxplot(fill = "light blue")
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black")
ggtitle("Title")
theme_bw() theme(legend.position="none")
Data
df <- structure(list(gender = structure(c(2L, 2L, NA, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L), levels = c("1", "2"), class = "factor"),
sum_variable = c(9, 6, 13, 3, 4, 3, 12, 2, 7, 8, 7, 4, 5,
10, 2, 5, 4, NA, 14, 9, 2, 5, 7, 3, NA, 3, 5, 7, 3, 8, 3,
3, 4, 8, 10, 9, 5, 7, 8, 4, 9, NA, 10, 14, 10, 3, 4, 10,
3, NA, 5, 3, 4, 4, NA, 5, 4, 6, 6, 9, 6, 2, 3, NA, 4, NA,
2, 2, 6, 5, 5, 3, 5, NA, 4, 4)), class = c("rowwise_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -76L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -76L), class = c("tbl_df",
"tbl", "data.frame")))