load("/Users/ElGuapo/Documents/projects/ps236/sections/section6/section6.RData") #What's the naive result? #positive with a T-stat of 3 summary(lm(PMDB.win.04~treat,data=data)) #Plot the density of the forcing variable plot(density(data$vote.margin),col="purple", lwd=2, main="Density of the Vote Margin") abline(v=0,lwd=2) hist(data$vote.margin,col="purple", lwd=2,breaks=15) abline(v=0,lwd=2) #### ###Binning #### #number of bins breaks=100 #create bin midpoints using the cut function bins.losers <- levels(cut(data$vote.margin[data$treat==0],breaks/2)) bin.midpoints.losers <- (as.numeric(as.numeric(sub(pattern="\\((.*),.*",x=bins.losers,replacement="\\1"))) + as.numeric(sub(pattern=".*,(.*).",x=bins.losers,replacement="\\1")))/2 bins.winners <- levels(cut(data$vote.margin[data$treat==1],breaks/2)) bin.midpoints.winners <- (as.numeric(as.numeric(sub(pattern="\\((.*),.*",x=bins.winners,replacement="\\1"))) + as.numeric(sub(pattern=".*,(.*).",x=bins.winners,replacement="\\1")))/2 #GENDER #mean within bins for losers, use the "cut " function bin.means.losers <- tapply(data$male.loser[data$treat==0],cut(data$vote.margin[data$treat==0],breaks/2),mean) #mean within bins for winners bin.means.winners <- tapply(data$male.winner[data$treat==1],cut(data$vote.margin[data$treat==1],breaks/2),mean) #Plot plot(1,type="n",ylim=c(.8,1),xlim=c(-.3,.3),xlab="2000 Vote Margin", ylab="P(Male Candidate)") points(bin.midpoints.losers,bin.means.losers,pch=19,col="blue") points(bin.midpoints.winners,bin.means.winners,pch=19,col="red") abline(v=0,lwd=2) #Add a loess line lines(loess.smooth(bin.midpoints.losers,bin.means.losers),col="blue",lwd=2) lines(loess.smooth(bin.midpoints.winners,bin.means.winners),col="red",lwd=2) #Population in 2000 #number of bins breaks=100 #mean within bins for losers, use the "cut " function bin.means.losers <- tapply(log(data$pop.2000[data$treat==0]),cut(data$vote.margin[data$treat==0],breaks/2),mean) #mean within bins for winners bin.means.winners <- tapply(log(data$pop.2000[data$treat==1]),cut(data$vote.margin[data$treat==1],breaks/2),mean) #Plot plot(1,type="n",ylim=c(8.9,9.6),xlim=c(-.3,.3),xlab="2000 Vote Margin", ylab="Population") points(bin.midpoints.losers,bin.means.losers,pch=19,col="blue") points(bin.midpoints.winners,bin.means.winners,pch=19,col="red") abline(v=0,lwd=2) #Add a loess line lines(loess.smooth(bin.midpoints.losers,bin.means.losers),col="blue",lwd=2) lines(loess.smooth(bin.midpoints.winners,bin.means.winners),col="red",lwd=2) #Number of Cattle in 1985 #number of bins breaks=100 #mean within bins for losers, use the "cut " function bin.means.losers <- tapply(log(data$cattle.1985[data$treat==0]+1),cut(data$vote.margin[data$treat==0],breaks/2),mean) #mean within bins for winners bin.means.winners <- tapply(log(data$cattle.1985[data$treat==1]+1),cut(data$vote.margin[data$treat==1],breaks/2),mean) #Plot plot(1,type="n",ylim=c(8.9,10.5),xlim=c(-.3,.3),xlab="2000 Vote Margin", ylab="Log Number of Cattle in 1985") points(bin.midpoints.losers,bin.means.losers,pch=19,col="blue") points(bin.midpoints.winners,bin.means.winners,pch=19,col="red") abline(v=0,lwd=2) #Add a loess line lines(loess.smooth(bin.midpoints.losers,bin.means.losers),col="blue",lwd=2) lines(loess.smooth(bin.midpoints.winners,bin.means.winners),col="red",lwd=2) #PROBABILITY OF WINNING IN 2004 #number of bins breaks=100 #mean within bins for losers, use the "cut " function bin.means.losers <- tapply(data$PMDB.win.04[data$treat==0],cut(data$vote.margin[data$treat==0],breaks/2),mean) #mean within bins for winners bin.means.winners <- tapply(data$PMDB.win.04[data$treat==1],cut(data$vote.margin[data$treat==1],breaks/2),mean) #Plot plot(1,type="n",ylim=c(0,.6),xlim=c(-.3,.3),xlab="2000 Vote Margin", ylab="Probability of Winning in 2004") points(bin.midpoints.losers,bin.means.losers,pch=19,col="blue") points(bin.midpoints.winners,bin.means.winners,pch=19,col="red") abline(v=0,lwd=2) #Add a loess line lines(loess.smooth(bin.midpoints.losers,bin.means.losers),col="blue",lwd=2) lines(loess.smooth(bin.midpoints.winners,bin.means.winners),col="red",lwd=2) ####Estimation ##Simple rectangular kernel #what's our bandwidth? h <- .05 #trim the data data.trim <- data[data$vote.margin-h,] #estimate LATE late.trim <- mean(data.trim$PMDB.win.04[data.trim$treat==1]) - mean(data.trim$PMDB.win.04[data.trim$treat==0]) #estimate SE using Neyman formula se.trim <- sqrt(var(data.trim$PMDB.win.04[data.trim$treat==1])/length(data.trim$PMDB.win.04[data.trim$treat==1]) + var(data.trim$PMDB.win.04[data.trim$treat==0])/length(data.trim$PMDB.win.04[data.trim$treat==0])) late.trim se.trim #what about a covariate? late.trim <- mean(log(data.trim$pop.2000[data.trim$treat==1])) - mean(log(data.trim$pop.2000[data.trim$treat==0])) se.trim <- sqrt(var(log(data.trim$pop.2000[data.trim$treat==1]))/length(data.trim$gini.2000[data.trim$treat==1]) + log(var(data.trim$pop.2000[data.trim$treat==0]))/length(data.trim$pop.2000[data.trim$treat==0])) late.trim se.trim ##Local Linear Regression #what's our bandwidth? h <- .05 #trim the data data.trim <- data[data$vote.margin-h,] #estimate two regressions left.reg <- lm(PMDB.win.04~vote.margin,data=data.trim[data.trim$vote.margin<0,]) right.reg <- lm(PMDB.win.04~vote.margin,data=data.trim[data.trim$vote.margin>=0,]) coef(right.reg)[1] - coef(left.reg)[1] #For inference, use the combined model summary(lm(PMDB.win.04~treat + vote.margin + vote.margin*treat, data = data.trim))