#Create scatterplot matrix pairs(CO[,2:17]) #Fit Complete linear model allterms <- lm(PercentYes ~ PercentObama + PercentUnder_18+Percent18_24+PercentOver65+MedianAge+AverageHouseholdSize+PercentPoorHealth+PercentSmoker+PercentObese+PercentUninsured+PercentGraduationRate+PercentPostHSEducation+MedianHouseholdIncome+PercentUnemployment+PercentChildrenPoverty,data=CO) summary(allterms) #Plot actual vs. predicted values plot(PercentYes~predict(allterms,newdata=CO), data=CO) abline(0,1) #Computing R-Squared from plot cor(CO$PercentYes,predict(allterms,newdata=CO))^2 #Getting rid of NA in orginal data.frame CO_NoNA <- na.omit(CO) allterms <- lm(PercentYes ~ PercentObama + PercentUnder_18+Percent18_24+PercentOver65+MedianAge+AverageHouseholdSize+PercentPoorHealth+PercentSmoker+PercentObese+PercentUninsured+PercentGraduationRate+PercentPostHSEducation+MedianHouseholdIncome+PercentUnemployment+PercentChildrenPoverty,data=CO_NoNA) summary(allterms) #Problems with NA avoided now... cor(CO_NoNA$PercentYes,predict(allterms,newdata=CO_NoNA))^2 #Getting predictor importance information importance_order <- order(abs(summary(allterms)$coefficients[,4])) summary(allterms)$coefficients[importance_order,] avg_error_original <- mean(abs(CO_NoNA$PercentYes - predict(allterms,newdata=CO_NoNA))) avg_error_original CO_NoNA_Temp <- CO_NoNA View(CO_NoNA_Temp) CO_NoNA_Temp[,(2+i)] <- sample(CO_NoNA_Temp[,(2+i)]) avg_error_temp[i] <- mean(abs(CO_NoNA_Temp$PercentYes - predict(allterms,newdata=CO_NoNA_Temp))) avg_error_temp = data.frame(i=rep(0,15),error=rep(0,15)) #Looping through all predictors for(i in 1:15){ CO_NoNA_Temp <- CO_NoNA #View(CO_NoNA_Temp) avg_error_temp[i,1] <- i CO_NoNA_Temp[,(2+i)] <- sample(CO_NoNA_Temp[,(2+i)]) avg_error_temp[i,2] <- mean(abs(CO_NoNA_Temp$PercentYes - predict(allterms,newdata=CO_NoNA_Temp))) }