I'm new to R, so I'm doing association rule mining using R, where I want to display the foods in the items column on the x axis of the barplot
. They should also match the relevant colSums
.
Morning <- colSums(Bakery == "Morning")
Afternoon <- colSums(Bakery == "Afternoon")
Evening <- colSums(Bakery == "Evening")
Night <- colSums(Bakery == "Night")
purchased <- rbind(Morning,Afternoon,Evening,Night)
barplot(purchased, legend=rownames(purchased))
I'm getting this kind of graph.
But I want this kind of graph
CodePudding user response:
Currently, you are obviously trying to count each day type individually.
colSums(dat['daytype'] == "morning")
# daytype
# 29
colSums(dat['daytype'] == "afternoon")
# daytype
# 24
# [...]
This is exactly what the table
function is for.
with(dat, table(daytype))
# daytype
# morning afternoon evening night
# 29 24 25 22
It also can cross tabulate daytype and items, what you probably had in mind.
with(dat, table(daytype, item))
# item
# daytype Bread Coffee Jam Muffin Pastry Tea
# morning 6 3 7 1 6 6
# afternoon 11 1 4 0 5 3
# evening 8 2 8 2 1 4
# night 8 2 3 1 4 4
Now you actually just need to pipe that into barplot
and you will already get this simple bar chart:
with(dat, table(daytype, item)) |> barplot()
You can refine this by giving colors, legend,
with(dat, table(daytype, item)) |>
barplot(col=2:5, legend.text=c('morning', 'afternoon', 'evening', 'night'))
or smaller bars shading lines, etc....
with(dat, table(daytype, item, dnn=c('foo', 'bar'))) |>
barplot(col=c('red', rep('blue', 4)), space=2, xlab='Items', ylab='Freq.',
density=c(15, 35, 15, 0), angle=c(45, 0, -45, 0), axis.lty=1,
legend.text=c('morning', 'afternoon', 'evening', 'night'),
args.legend=list(bty='n'), main='Title here')
Note: R >= 4.1 used.
Data:
dat <- structure(list(item = c("Bread", "Tea", "Pastry", "Jam", "Jam",
"Tea", "Coffee", "Jam", "Coffee", "Coffee", "Muffin", "Bread",
"Bread", "Pastry", "Bread", "Bread", "Pastry", "Bread", "Tea",
"Tea", "Bread", "Tea", "Tea", "Bread", "Bread", "Bread", "Muffin",
"Bread", "Pastry", "Pastry", "Jam", "Pastry", "Tea", "Bread",
"Jam", "Pastry", "Bread", "Bread", "Bread", "Jam", "Pastry",
"Jam", "Pastry", "Bread", "Jam", "Tea", "Bread", "Muffin", "Coffee",
"Tea", "Jam", "Bread", "Bread", "Bread", "Bread", "Coffee", "Bread",
"Jam", "Bread", "Jam", "Bread", "Jam", "Tea", "Jam", "Tea", "Pastry",
"Pastry", "Bread", "Jam", "Jam", "Jam", "Pastry", "Bread", "Bread",
"Tea", "Coffee", "Tea", "Pastry", "Coffee", "Tea", "Muffin",
"Bread", "Tea", "Pastry", "Jam", "Coffee", "Jam", "Jam", "Tea",
"Bread", "Bread", "Tea", "Jam", "Pastry", "Bread", "Bread", "Jam",
"Jam", "Pastry", "Bread"), time = structure(c(1477858620L, 1477867620L,
1477919100L, 1477935060L, 1477946940L, 1477978680L, 1477986180L,
1478049060L, 1478068680L, 1478072640L, 1478077800L, 1478082420L,
1478097840L, 1478098020L, 1478110020L, 1478116620L, 1478127600L,
1478129880L, 1478137320L, 1478147280L, 1478152140L, 1478161260L,
1478164740L, 1478171340L, 1478177040L, 1478193780L, 1478205420L,
1478231700L, 1478236980L, 1478238120L, 1478246760L, 1478249280L,
1478264040L, 1478276580L, 1478321940L, 1478323620L, 1478346720L,
1478351280L, 1478362080L, 1478363040L, 1478384340L, 1478384520L,
1478401920L, 1478404860L, 1478404920L, 1478431320L, 1478439360L,
1478454000L, 1478454180L, 1478463840L, 1478464740L, 1478466900L,
1478470500L, 1478483220L, 1478485680L, 1478490480L, 1478495160L,
1478495400L, 1478500140L, 1478500980L, 1478510580L, 1478512440L,
1478515380L, 1478516880L, 1478524380L, 1478532240L, 1478562000L,
1478578740L, 1478596860L, 1478600340L, 1478604120L, 1478604780L,
1478617020L, 1478628180L, 1478647380L, 1478653860L, 1478683500L,
1478699460L, 1478712660L, 1478723160L, 1478733540L, 1478738760L,
1478742540L, 1478768760L, 1478780820L, 1478786640L, 1478814420L,
1478848560L, 1478850960L, 1478878740L, 1478885820L, 1478885880L,
1478891220L, 1478928420L, 1478932620L, 1478961600L, 1478978400L,
1478979300L, 1479005580L, 1479009900L), class = c("POSIXct",
"POSIXt"), tzone = ""), no = c(365L, 449L, 949L, 1082L, 1192L,
1477L, 1535L, 2127L, 2302L, 2341L, 2389L, 2439L, 2572L, 2574L,
2683L, 2745L, 2843L, 2865L, 2943L, 3036L, 3091L, 3177L, 3210L,
3269L, 3316L, 3466L, 3570L, 3793L, 3841L, 3852L, 3925L, 3945L,
4079L, 4196L, 4598L, 4612L, 4819L, 4863L, 4944L, 4953L, 5136L,
5139L, 5300L, 5327L, 5328L, 5565L, 5637L, 5766L, 5768L, 5853L,
5863L, 5884L, 5917L, 6033L, 6059L, 6096L, 6135L, 6139L, 6182L,
6191L, 6289L, 6309L, 6337L, 6353L, 6423L, 6481L, 6767L, 6918L,
7079L, 7108L, 7146L, 7152L, 7271L, 7379L, 7553L, 7613L, 7887L,
8038L, 8162L, 8257L, 8349L, 8392L, 8426L, 8674L, 8791L, 8850L,
9114L, 9426L, 9449L, 9719L, 9781L, 9782L, 9821L, 10181L, 10220L,
10486L, 10635L, 10642L, 10891L, 10932L), daytype = structure(c(3L,
3L, 2L, 3L, 3L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 1L, 1L, 1L, 1L, 2L,
2L, 4L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 4L, 4L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 4L, 1L, 2L, 3L, 3L, 4L,
4L, 4L, 1L, 2L, 2L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 3L,
3L, 4L, 4L), levels = c("morning", "afternoon", "evening", "night"
), class = "factor")), row.names = c(670L, 835L, 1754L, 1999L,
2192L, 2712L, 2828L, 3888L, 4218L, 4284L, 4368L, 4459L, 4703L,
4708L, 4905L, 5027L, 5198L, 5236L, 5376L, 5541L, 5640L, 5793L,
5859L, 5974L, 6063L, 6340L, 6533L, 6943L, 7030L, 7050L, 7198L,
7236L, 7473L, 7691L, 8399L, 8420L, 8800L, 8885L, 9051L, 9064L,
9407L, 9415L, 9713L, 9767L, 9769L, 10162L, 10280L, 10514L, 10518L,
10680L, 10700L, 10735L, 10799L, 11008L, 11058L, 11136L, 11208L,
11220L, 11308L, 11327L, 11496L, 11527L, 11585L, 11622L, 11754L,
11866L, 12395L, 12684L, 12970L, 13026L, 13090L, 13099L, 13329L,
13532L, 13829L, 13936L, 14427L, 14706L, 14904L, 15077L, 15252L,
15325L, 15382L, 15821L, 16050L, 16150L, 16636L, 17197L, 17237L,
17721L, 17831L, 17835L, 17903L, 18545L, 18613L, 19070L, 19338L,
19346L, 19803L, 19873L), class = "data.frame")
CodePudding user response:
Use the ggplot2
package
Change your_data
for the name of your data frame and
Try this to start:
library(ggplot2)
ggplot(data = your_data)
geom_bar(aes(x = Items, fill = Daypart))