Data<-read.table("/STACodes/RWorkshop/Data/datafile.txt", sep="", header=F) Data # Pulse<-data.frame(read.csv(file="/STACodes/RWorkshop/Data/data.csv", header=T)) # Pulse$Gender[Pulse$Gender==1]="M" # Pulse$Gender[Pulse$Gender==2]="F" # write.csv(Pulse,"/STACodes//RWorkshop/Data/data1.csv", row.names=F) # Reading data ------------------------------------------------------------ Pulse<-read.table(file="/STACodes/RWorkshop/Data/data.csv", sep=",", header=T) Pulse<-read.csv(file="/STACodes/RWorkshop/Data/data.csv", header=T) names(Pulse) str(Pulse) summary(Pulse) summary(Pulse$Weight) # Question 1 -------------------------------------------------------------- plot(Pulse$Weight, Pulse$Pulse1) # alternative plot method plot(Pulse$Pulse1~Pulse$Weight) abline(lm(Pulse$Pulse1~Pulse$Weight)) lm(Pulse1~Weight, data=Pulse) reg.fit<-lm(Pulse1~Weight, data=Pulse) summary(reg.fit) anova(reg.fit) fitted(reg.fit) residuals(reg.fit) hist(residuals(reg.fit)) plot(fitted(reg.fit), residuals(reg.fit)) pred.values<-fitted(reg.fit) resid<-residuals(reg.fit) hist(resid) plot(pred.values,resid) cor(Pulse$Weight, Pulse$Pulse1) ?cor cor(Pulse$Pulse1,Pulse$Weight, use="pairwise.complete.obs") cor.test(Pulse$Pulse1,Pulse$Weight, use="pairwise.complete.obs") ## Pulse 1 V Smokes boxplot(Pulse$Pulse1[Pulse$Smokes==1]) boxplot(Pulse$Pulse1[Pulse$Smokes==2]) Pulse$Smokes.factor <- factor(Pulse$Smokes) boxplot(Pulse$Pulse1~Pulse$Smokes.factor) Pulse$Smokes.factor <- factor(Pulse$Smokes, levels=c(1,2), labels=c("Y", "N")) boxplot(Pulse$Pulse1~Pulse$Smokes.factor) mean(Pulse$Pulse1[Pulse$Smokes==1]) sd(Pulse$Pulse1[Pulse$Smokes==1]) mean(Pulse$Pulse1[Pulse$Smokes==2]) sd(Pulse$Pulse1[Pulse$Smokes==2]) mean(Pulse$Pulse1[Pulse$Smokes==2], na.rm=T) sd(Pulse$Pulse1[Pulse$Smokes==2], na.rm=T) tapply(Pulse$Pulse1,Pulse$Smokes.factor, FUN=mean, na.rm=T) t.test(Pulse$Pulse1~Pulse$Smokes.factor) t.test(Pulse$Pulse1~Pulse$Smokes.factor, alternative="greater",var.equal = T) # Question 2 -------------------------------------------------------------- Pulse$Ran.factor <- factor(Pulse$Ran, levels=c(1,2), labels=c("Y", "N")) ran.counts <- table(Pulse$Ran.factor) ran.counts prop.table(ran.counts) n <- length(Pulse$Ran.factor) success <- sum(Pulse$Ran.factor == "Y") prop.test(success, n, alternative="less") Year.table<-table(Pulse$Year,Pulse$Ran.factor) Year.table prop.table(Year.table) prop.table(Year.table,margin=1) chisq.test(Year.table) Year93.table<-table(Pulse$Year[Pulse$Year==93], Pulse$Ran.factor[Pulse$Year==93]) Year93.table barplot(Year93.table) Year.table<-table(Pulse$Year,Pulse$Ran.factor) Year.table barplot(Year.table,xlab="Ran",ylab="Count",main="Ran(Y/N) for all Years",legend=rownames(Year.table),beside=T) Pulse$Year.93 <- ifelse(Pulse$Year==93, 93, "other") Year.table.93 <-table(Pulse$Year.93, Pulse$Ran.factor) Year.table.93 chisq.test(Year.table.93) install.packages("gmodels") #already installed library(gmodels) ?CrossTable CrossTable(Pulse$Year, Pulse$Ran, prop.t=T, prop.c=F, prop.chisq=F, chisq = T) CrossTable(Pulse$Year.93, Pulse$Ran, prop.t=T, prop.c=F, prop.chisq=F,chisq = T)