library(reshape2)
library(coin)
library(ggplot2)
#' # 1
tips$percent <- with(tips, tip/total_bill * 100)
t.test(percent ~ sex, data = tips, var.equal = TRUE)
t.test(percent ~ sex, data = subset(tips, percent < 40), var.equal = TRUE)
wilcox_test(percent ~ sex, data = tips, conf.int = TRUE)
wilcox_test(percent ~ sex, data = subset(tips, percent < 40), conf.int = TRUE)
#' The Wilcoxon Rank Sum test does appear more resistent to outliers
#' than the two sample t-test.
#'
#' The changes in the p-value from with outliers to without are:
#' for t-test: .2797 to .1351
#' for wilcoxon rank sum test: .1349 to .1475
#'
#' The changes in the confidence intervals from with outliers to without are:
#' for t-test: (-0.72, 2.49) to (-0.30 2.19)
#' for wilcoxon rank sum test: (-0.29, 2.22) to (-0.30, 2.16)
#'
#' The t-test p-value has almost halved but the Wilcoxon Rank Sun p-value has
#' only changed by about 10%. Since the effect on the p-value of the outliers
#' for the Wilcoxon Rank Sum is small, we say it is resistant. Similarly
#' when we look at the confidence intervals, the endpoints change a lot for the t-test
#' but not much for the Wilcoxon Rank Sum.
#'
#' # 2
#' ## 1\.
library(ggplot2)
qplot(group, weight, data = PlantGrowth) + theme_grey(10)
# ## 2\.
(avgs <- with(PlantGrowth, tapply(weight, group, mean)))
(sds <- with(PlantGrowth, tapply(weight, group, sd)))
(ns <- with(PlantGrowth, tapply(weight, group, length)))
#' ## 3\.
(sp <- sqrt(sum((ns-1)*sds^2)/sum(ns - 1)))
(df <- sum(ns) - length(ns))
#' The pooled standard deviation is `r round(sp,2)` with `r df` degrees of freedom.
#'
#' ## 4\.
PlantGrowth$overall_avg <- with(PlantGrowth, mean(weight))
PlantGrowth$group_avg <- with(PlantGrowth, ave(weight, group))
(ss_total <- with(PlantGrowth, sum((weight - overall_avg)^2)))
(ss_within <- with(PlantGrowth, sum((weight - group_avg)^2)))
#' The residual sum of squares for the reduced model (total sum of squares) is `r round(ss_total,4)` and the residuals sum of squares for the full model (within group sum of squares) is `r round(ss_within,4)`.
#'
#' ## 5\. (a)
sp^2 * df
#' ## 5\. (b)
sd(PlantGrowth$weight)^2 * (sum(ns) - 1)
#' ## 6\.
#'
#'
#' Source of variation | Sum Sq | Df | Mean Sq | F statistic | p-value
#' --------------------- | ----------- | ---- | --------- | ------------- | ----------
#' **Between groups** | 3.7663 | 2 | 1.8831 | 4.8459 | 0.0159
#' **Within groups** | 10.4921 | 27 | 0.3886 | |
#' **Total** | 14.2584 | 29 | | |
#'
#' Table: Analysis of Variance Table
#'
#'
#' ## 7\.
#' There is moderate evidence that the treatment effects of the three conditions are not all equal (one-way ANOVA F-test, p-value = 0.016).
#'
#' ## 8\.
(ci_1 <- (avgs["ctrl"] - avgs["trt1"]) + c(-1, 1)*qt(0.975, df)*sp*sqrt(1/ns["ctrl"]+ 1/ns["trt1"]))
#' With 95% confidence the treatment effect of Treatment 1 is between increasing weight by 0.20 units and decreasing weight by 0.94 units, compared to the control.