library(Sleuth3)
library(ggplot2)
head(case0502)
# plotting multiple groups is just like plotting two groups
qplot(Judge, Percent, data = case0502)
qplot(Judge, Percent, data = case0502, geom = "boxplot")
# sometimes it's helpful to reorder the groups by the average response
qplot(reorder(Judge, Percent), Percent, data = case0502, geom = "boxplot")
# getting summaries for each group
# sample averages
averages <- with(case0502, tapply(Percent, Judge, mean))
# sample sds
sds <- with(case0502, tapply(Percent, Judge, sd))
# sample sizes
ns <- with(case0502, tapply(Percent, Judge, length))
averages
sds
ns
# finding the pooled standard deviation, using all groups
sp <- sqrt(sum((ns - 1) * sds^2) / sum(ns - 1))
# individual entries can be pulled out using [
# averages[1] is the first groups sample average
averages[1]
# You can use numbers or names
names(averages)
averages["Spock's"]
# difference in averages
diff <- (averages["A"] - averages["Spock's"] )
se_diff <- (sp * sqrt(1/ns["A"] + 1/ns["Spock's"]))
# t-statistic
t.stat <- diff / se_diff
# two-sided p-value
2*(1 - pt(abs(t.stat), sum(ns) - length(ns)))
# 95% CI on difference in means
diff + c(-1,1) * qt(0.975, sum(ns) - length(ns)) * se_diff