library(ggplot2)
load(url("http://stat511.cwick.co.nz/data/cdc.rda"))
males <- subset(cdc, gender == 'm')
head(males)
# 1 What is the average age of female respondents?
females = subset(cdc,gender=="f")
mean(females$age)
# 2 Find the median weight of respondents without health insurance. (Hint: create a new data.frame of people without health insurance, then find the median of the weight column in that new data.frame. There is a median function.)
nohealth=subset(cdc,hlthplan==0)
median(nohealth$weight)
#or in one line
median(subset(cdc,hlthplan==0)$weight)
# 3 Find the median weight of respondents without health insurance and who had not exercised in the last month.
nohealthnoex=subset(cdc,hlthplan==0 & exerany==0)
median(nohealthnoex$weight)
# 4 How many females are younger than age 30?
dim(subset(females,age < 30))
#or
length(subset(females,age < 30)$age)
# 5 (Harder) Find the average difference between weight and wtdesire (the desired weight) for all respondents and for males and females separately.
mean(cdc$weight - cdc$wtdesire)
mean(males$weight - males$wtdesire)
mean(females$weight - females$wtdesire)
######### Plotting ######
#1
qplot(gender,age,data=cdc)
#2
qplot(age,data=cdc) + facet_wrap(~gender,nrow=2)
#3
qplot(factor(exerany),weight,data=cdc,geom="boxplot")
#4
qplot(gender,weight,data=cdc,color=gender,geom="boxplot")
#5
qplot(factor(exerany),weight,geom="violin",data=cdc) + facet_wrap(~gender)
#7
qplot(factor(exerany),weight,geom="violin",data=cdc) + facet_wrap(~gender) + theme_bw()
#8
qplot(factor(exerany),weight,geom="violin",data=cdc) + facet_wrap(~gender) + theme_bw() +
ggtitle("Hello,I'm a title")