But when I try to recreate this within R with ggplot2 i'm stuck.
I think i'm just unable to figure out how Excel is Summing the MaleCount and unable to replicate in R - This is the plot i'm getting in R without summing.
And this is the code used to create:
ggplot(data = df,
aes(x = df$AgeBand,
y = df$MaleCount))
geom_line(aes(colour = factor(HealthBoard)))
ggtitle("I have no idea")
Let me know you have any more questions.
Thanks in advance, Mark.
EDIT: Adding structure below.
tibble [50 x 11] (S3: tbl_df/tbl/data.frame)
$ Period : num [1:50] 202004 202004 202004 202004 202004 ...
$ PracticeCode : chr [1:50] "W96016" "W95001" "W93021" "W91054" ...
$ PostCode : chr [1:50] "NP8 1AG" "CF44 7DD" "NP16 5XR" "LL12 7TH" ...
$ OrgCode : chr [1:50] "7A7" "7A5" "7A6" "7A1" ...
$ AgeBand : num [1:50] 8 24 11 14 68 24 4 56 85 17 ...
$ MaleCount : num [1:50] 37 94 49 41 28 53 16 20 4 40 ...
$ FemaleCount : num [1:50] 41 98 41 31 28 64 20 14 7 50 ...
$ IndeterminateCount: num [1:50] 0 0 0 0 0 0 0 0 0 0 ...
$ Count : num [1:50] 78 192 90 72 56 117 36 34 11 90 ...
$ Year : num [1:50] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
$ Month : chr [1:50] "April" "April" "April" "April" ...
Adding dput below.
structure(list(Period = c(202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004, 202004, 202004, 202004,
202004, 202004, 202004, 202004, 202004), PracticeCode = c("W95023",
"W95086", "W91015", "W93045", "W93125", "W97623", "W95073", "W95042",
"W94017", "W97025", "W95016", "W92048", "W98033", "W94018", "W93116",
"W93059", "W94035", "W93046", "W92058", "W97016", "W94021", "W98048",
"W94026", "W97069", "W98012", "W92052", "W93072", "W91044", "W96015",
"W97060", "W97008", "W94609", "W91038", "W97010", "W92023", "W97067",
"W93049", "W97028", "W91058", "W97048", "W92023", "W93061", "W91610",
"W94007", "W95034", "W95024", "W93075", "W95032", "W95087", "W93029"
), PostCode = c("CF48 1BZ", "CF48 3AL", "CH5 3PA", "NP20 6EY",
"NP18 2JB", "CF5 5LQ", "CF83 3JZ", "CF45 4YB", "LL55 4SU", "CF14 3NB",
"CF44 6HY", "SA14 8TU", "SA3 5UA", "LL30 3EU", "NP10 8UX", "NP11 6BJ",
"LL23 7BA", "NP20 4JS", "SA62 6SS", "CF11 9SH", "LL52 0RR", "SA10 6UF",
"LL65 1RA", "CF3 0SH", "SA4 3ED", "SA15 3BD", "NP25 3PL", "CH7 4RQ",
"SY16 1EF", "CF24 1AG", "CF23 9PN", "LL54 6NN", "LL22 8LJ", "CF23 8SQ",
"SA34 0AJ", "CF11 9DG", "NP19 7DQ", "CF14 1LT", "LL13 8RG", "CF24 2HB",
"SA34 0AJ", "NP10 9DU", "LL12 9LG", "LL36 9HL", "CF33 4LD", "CF37 2DR",
"NP13 1BQ", "CF46 5HE", "CF44 7AY", "NP44 4TA"), OrgCode = c("7A5",
"7A5", "7A1", "7A6", "7A6", "7A4", "7A6", "7A5", "7A1", "7A4",
"7A5", "7A2", "7A3", "7A1", "7A6", "7A6", "7A1", "7A6", "7A2",
"7A4", "7A1", "7A3", "7A1", "7A4", "7A3", "7A2", "7A6", "7A1",
"7A7", "7A4", "7A4", "7A1", "7A1", "7A4", "7A2", "7A4", "7A6",
"7A4", "7A1", "7A4", "7A2", "7A6", "7A1", "7A1", "7A5", "7A5",
"7A6", "7A5", "7A5", "7A6"), AgeBand = c(87, 31, 44, 53, 23,
91, 24, 12, 93, 83, 26, 38, 92, 47, NA, 23, 27, 80, 93, 2, 46,
82, 11, 45, 72, 18, 26, 54, 89, 71, 30, 27, 18, 37, 50, 4, 8,
51, 59, 8, 4, 64, 92, 13, 88, 85, 78, 56, 45, 44), MaleCount = c(12,
153, 52, 59, 16, 0, 10, 39, 1, 9, 33, 33, 13, 44, 3, 37, 31,
15, 0, 17, 18, 8, 39, 24, 143, 84, 24, 23, 6, 30, 129, 21, 61,
72, 55, 23, 86, 68, 82, 81, 42, 57, 0, 23, 12, 24, 27, 43, 18,
63), FemaleCount = c(14, 133, 73, 62, 22, 1, 18, 36, 3, 10, 36,
25, 21, 38, 20, 44, 24, 21, 1, 18, 21, 19, 30, 26, 151, 71, 23,
17, 27, 20, 132, 17, 65, 70, 55, 28, 73, 73, 69, 80, 28, 74,
2, 25, 24, 27, 24, 33, 33, 64), IndeterminateCount = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Count = c(26, 286, 125, 121, 38, 1, 28, 75,
4, 19, 69, 58, 34, 82, 23, 81, 55, 36, 1, 35, 39, 27, 69, 50,
294, 155, 47, 40, 33, 50, 261, 38, 126, 142, 110, 51, 159, 141,
151, 161, 70, 131, 2, 48, 36, 51, 51, 76, 51, 127), Year = c(2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020), Month = c("April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April", "April", "April",
"April", "April", "April", "April", "April")), row.names = c(NA,
-50L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
I renamed OrgCode
to HealthBoard
according to your comments.
library(dplyr)
df %>%
rename(HealthBoard=OrgCode) %>%
group_by(HealthBoard,AgeBand) %>%
summarise(MaleCount=sum(MaleCount),.groups='drop') %>%
ggplot(aes(x=AgeBand,y=MaleCount,color=HealthBoard))
geom_line()
ggtitle('You have some idea now.')
output;