# problem 3: load("./hw4.RData") attach(data) # part a) # using only treatment model1 = lm(bush04 ~ etouch) summary(model1) #Call: #lm(formula = bush04 ~ etouch) # #Residuals: # Min 1Q Median 3Q Max #-0.311761 -0.054553 -0.004049 0.077816 0.181817 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 0.60975 0.01441 42.327 <2e-16 *** #etouch -0.06502 0.03045 -2.136 0.0365 * #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 0.1039 on 65 degrees of freedom #Multiple R-squared: 0.06556, Adjusted R-squared: 0.05118 #F-statistic: 4.56 on 1 and 65 DF, p-value: 0.03649 # using some basic demographic variables model2 = lm(bush04 ~ etouch + hisp00 + black00 + lowEduc00 + foreignBorn00 + income) summary(model2) #Call: #lm(formula = bush04 ~ etouch + hisp00 + black00 + lowEduc00 + # foreignBorn00 + income) # #Residuals: # Min 1Q Median 3Q Max #-0.112792 -0.049716 -0.009212 0.052389 0.209099 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 4.694e-01 1.021e-01 4.596 2.27e-05 *** #etouch -3.032e-02 2.739e-02 -1.107 0.27280 #hisp00 7.539e-03 3.112e-01 0.024 0.98075 #black00 -5.521e-01 1.012e-01 -5.455 9.78e-07 *** #lowEduc00 2.122e+00 6.372e-01 3.330 0.00149 ** #foreignBorn00 -8.344e-01 3.763e-01 -2.218 0.03039 * #income 4.541e-06 2.254e-06 2.015 0.04841 * #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 0.07658 on 60 degrees of freedom #Multiple R-squared: 0.5313, Adjusted R-squared: 0.4844 #F-statistic: 11.33 on 6 and 60 DF, p-value: 1.991e-08 # using past outcomes and some demographics model3 = lm(bush04 ~ etouch + black00 + white00 + income + votePer96.rep + votePer00.rep) summary(model3) #Call: #lm(formula = bush04 ~ etouch + black00 + white00 + income + votePer96.rep + # votePer00.rep) # #Residuals: # Min 1Q Median 3Q Max #-0.0553277 -0.0143562 0.0001657 0.0107277 0.0685523 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 8.235e-02 2.310e-01 0.356 0.7228 #etouch -8.489e-03 7.073e-03 -1.200 0.2348 #black00 -1.181e-01 2.345e-01 -0.504 0.6163 #white00 -3.505e-03 2.353e-01 -0.015 0.9882 #income -1.282e-06 7.274e-07 -1.762 0.0831 . #votePer96.rep -2.268e-01 1.040e-01 -2.181 0.0331 * #votePer00.rep 1.240e+00 8.303e-02 14.934 <2e-16 *** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 0.02088 on 60 degrees of freedom #Multiple R-squared: 0.9652, Adjusted R-squared: 0.9617 #F-statistic: 277 on 6 and 60 DF, p-value: < 2.2e-16 # part b) pscore = glm(etouch ~ foreignBorn00 + black00 + income + votePer00.rep + lowEduc00 + turnout00 + votePer96.rep, family = binomial(link = logit))$linear.predictors pdf(file = "./pscore.pdf") boxplot(pscore ~ etouch, main = "PScores by Treatment Assignment") dev.off() # part c) nn.att = function(x, treat, cal = 100) { # make my treated and control groups and their respective indexes index = 1:length(x) tr = x[treat == 1] index.tr = index[treat == 1] co = x[treat == 0] index.co = index[treat == 0] # initialize my matched data set match.data = NULL # cycle through for each element in the treated group # since I am trying to find att for(i in 1:length(tr)) { #calculate the distance dis = abs(co - tr[i]) # find the minimum one(s) match.index = which(dis == min(dis)) # create the matches. The rep() statements are for ties # where we repeat the index for however many ties there are, # repeat the treatment value for however many ties there are, # the index of the nearest neighbor(s), the values for the # control units, the distance of those units, and the weight # for each unit. If there is only one nearest neighbor # then this will only have one row, otherwise it will have # a row for each neighbor matches = cbind(rep(index.tr[i], length(match.index)), rep(tr[i], length(match.index)), index.co[match.index], co[match.index], dis[match.index], rep(1 / length(match.index), length(match.index))) # attach the matches to the bottom of my matched data set match.data = rbind(match.data, matches) } # find which rows are above the caliper tooBig = which(match.data[,5] > (cal * sd(x)) ) dropped = NULL if( length(tooBig) > 0) { # make a dropped matrix dropped = rbind(match.data[tooBig, c(1, 2)]) dropped = as.matrix(dropped) colnames(dropped) = c("index.tr","xt") # remove those rows which exceeded the caliper match.data = match.data[-tooBig, ] } # give names to my match.data object and return that and the dropped match.data = as.data.frame(match.data) colnames(match.data) = c("index.tr","xt","index.co","xc","dis","wts") return(list(match.data = match.data, dropped = dropped, index.tr = match.data[,1], index.co = match.data[,3], weights = match.data[,6])) } nn.pscore = nn.att(pscore, treat = etouch) nn.pscore nn.pscore.cal1 = nn.att(pscore, treat = etouch, cal = 1) nn.pscore.cal1 nn.pscore.cal.1 = nn.att(pscore, treat = etouch, cal = 0.1) nn.pscore.cal.1 # part d) # before pdf(file = "./before.pdf") par(mfrow = c(2, 2)) plot(density(votePer00.rep[etouch == 1]), main = "Republican Vote Percentage '00 \n Before", col = "blue", lty = 1, ylim = c(0, 5)) points(density(votePer00.rep[etouch == 0]), type = "l", lty = 2, col = "red") legend("topleft", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(black00[etouch == 1]), main = "Black Percentage '00 \n Before", col = "blue", lty = 1, ylim = c(0, 7)) points(density(black00[etouch == 0]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(turnout00[etouch == 1]), main = "Turnout '00 \n Before", col = "blue", lty = 1, ylim = c(0, 15), xlim = c(0.5, 0.9)) points(density(turnout00[etouch == 0]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(foreignBorn00[etouch == 1]), main = "Foreign Born Percentage '00 \n Before", col = "blue", lty = 1, ylim = c(0, 12)) points(density(foreignBorn00[etouch == 0]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) dev.off() # no caliper pdf(file = "./noCal.pdf") par(mfrow = c(2, 2)) plot(density(votePer00.rep[nn.pscore$index.tr]), main = "Republican Vote Percentage '00 \n No Caliper", col = "blue", lty = 1, ylim = c(0, 10)) points(density(votePer00.rep[nn.pscore$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(black00[nn.pscore$index.tr]), main = "Black Percentage '00 \n No Caliper", col = "blue", lty = 1, ylim = c(0, 7)) points(density(black00[nn.pscore$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(turnout00[nn.pscore$index.tr]), main = "Turnout '00 \n No Caliper", col = "blue", lty = 1, ylim = c(0, 15), xlim = c(0.5, 0.9)) points(density(turnout00[nn.pscore$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(foreignBorn00[nn.pscore$index.tr]), main = "Foreign Born Percentage '00 \n No Caliper", col = "blue", lty = 1, ylim = c(0, 8)) points(density(foreignBorn00[nn.pscore$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) dev.off() # 0.1 caliper pdf(file = "./withCal.pdf") par(mfrow = c(2, 2)) plot(density(votePer00.rep[nn.pscore.cal.1$index.tr]), main = "Republican Vote Percentage '00 \n Caliper = 0.1", col = "blue", lty = 1, ylim = c(0, 9)) points(density(votePer00.rep[nn.pscore.cal.1$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(black00[nn.pscore.cal.1$index.tr]), main = "Black Percentage '00 \n Caliper = 0.1", col = "blue", lty = 1, ylim = c(0, 10), xlim = c(-0.05, 0.3)) points(density(black00[nn.pscore.cal.1$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(turnout00[nn.pscore.cal.1$index.tr]), main = "Turnout '00 \n Caliper = 0.1", col = "blue", lty = 1, ylim = c(0, 18), xlim = c(0.5, 0.8)) points(density(turnout00[nn.pscore.cal.1$index.co]), type = "l", lty = 2, col = "red") legend("topleft", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) plot(density(foreignBorn00[nn.pscore.cal.1$index.tr]), main = "Foreign Born Percentage '00 \n Caliper = 0.1", col = "blue", lty = 1, ylim = c(0, 15), xlim = c(0, 0.3)) points(density(foreignBorn00[nn.pscore.cal.1$index.co]), type = "l", lty = 2, col = "red") legend("topright", c("Treated", "Control"), col = c("blue", "red"), lty = c(1, 2)) dev.off() # part f att = mean(bush04[nn.pscore$index.tr] * nn.pscore$weights) - mean(bush04[nn.pscore$index.co] * nn.pscore$weights) att # 0.01305403 detach(data)