load("/Users/ElGuapo/Documents/projects/ps236/sections/section6/section6.RData") h <- seq(from = .01, to = .2, by = .01) data.window <- data[data$vote.margin>-.5 & data$vote.margin<.5, ] cv <- rep(NA,times=length(h)) for(j in 1:length(h)){ y.hat <- rep(NA,times=nrow(data.window)) for (i in 1:nrow(data.window)){ c <- data.window$vote.margin[i] if(c<0){ x.bw <- data$vote.margin[data$vote.margin< c & data$vote.margin>(c- h[j])] y.bw <- data$PMDB.vote.share.04[data$vote.margin< c & data$vote.margin>(c- h[j])] } if(c>=0){ x.bw <- data$vote.margin[data$vote.margin> c & data$vote.margin<(c+ h[j])] y.bw <- data$PMDB.vote.share.04[data$vote.margin> c & data$vote.margin<(c+ h[j])] } x.bw <- x.bw - c y.hat[i] <- coef(lm(y.bw~x.bw))[1] + c } cv[j] <- sum((data.window$PMDB.vote.share.04 - y.hat)^2)/nrow(data.window) print(j) } plot(h,cv,type="l",lwd=2,col="red") points(h[which(cv==min(cv))], min(cv),pch = 19, cex=2,col="blue") h.opt <- h[which(cv==min(cv))] #trim the data data.trim <- data[data$vote.margin-h.opt,] #For inference, use the combined model summary(lm(PMDB.win.04~treat + vote.margin + vote.margin*treat, data = data.trim)) ######## #Advance Propensity Score Matching ######### library(Matching) wfl.data <- read.csv("/Users/ElGuapo/Documents/projects/ps236/sections/section7/cross_section_wfl.csv") covar <- wfl.data[,c("y1990.pbirpc","y1990.tasatot","y1990.unemp","y1990.ineq","y1990.radical","y1990.peron","y1990.tminfec","y1990.hognbi2","y1990.hognbi3")] pscore.fmla <- as.formula(paste("treat~",paste(names(covar),collapse="+"))) treat <- wfl.data$treat pscore <- glm(pscore.fmla,data = wfl.data,family = binomial(link = logit)) lp <- pscore$linear.predictor orth.covar <- covar # Orthogonalize covariates for(i in 1:ncol(orth.covar)){ orth.covar[,i]<-lm(orth.covar[,i]~lp)$residuals } bal.data <- covar match.data <- cbind(lp,orth.covar) unmatched.bal <- MatchBalance(pscore.fmla,data=bal.data) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$std.diff[i] <- abs(as.numeric((unmatched.bal$BeforeMatching)[[i]][1])) bal.stats$diff.means.p[i] <- (as.numeric((unmatched.bal$BeforeMatching)[[i]][7])) bal.stats$var.ratios[i] <- (as.numeric((unmatched.bal$BeforeMatching)[[i]][8])) } dotchart(bal.stats$std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) dotchart(bal.stats$diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19) dotchart(bal.stats$var.ratios,labels=bal.stats$covar,main="Variance Ratios",color="black",pch=19) match.pscore <- Match(Tr=treat,X=match.data$lp,M=1,estimand="ATT") pscore.bal <- MatchBalance(pscore.fmla, data=bal.data, match.out = match.pscore) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$bm.std.diff[i] <- abs(as.numeric((pscore.bal$BeforeMatching)[[i]][1])) bal.stats$bm.diff.means.p[i] <- (as.numeric((pscore.bal$BeforeMatching)[[i]][7])) bal.stats$bm.var.ratios[i] <- (as.numeric((pscore.bal$BeforeMatching)[[i]][8])) bal.stats$am.std.diff[i] <- abs(as.numeric((pscore.bal$AfterMatching)[[i]][1])) bal.stats$am.diff.means.p[i] <- (as.numeric((pscore.bal$AfterMatching)[[i]][7])) bal.stats$am.var.ratios[i] <- (as.numeric((pscore.bal$AfterMatching)[[i]][8])) } dotchart(bal.stats$bm.std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) points(bal.stats$am.std.diff,1:9,col="red",pch=19) dotchart(bal.stats$bm.diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19,xlim=c(0,1)) points(bal.stats$am.diff.means.p,1:9,col="red",pch=19) dotchart(bal.stats$bm.var.ratios,labels=bal.stats$covar,main="Variance Ratios",color="black",pch=19) points(bal.stats$am.var.ratios,1:9,col="red",pch=19) ###MANHALOBIS DISTANCE match.mahn <- Match(Tr=treat, X=match.data,M=1,estimand="ATT",Weight=2) mahn.bal <- MatchBalance(pscore.fmla, data = bal.data, match.out = match.mahn) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$bm.std.diff[i] <- abs(as.numeric((mahn.bal$BeforeMatching)[[i]][1])) bal.stats$bm.diff.means.p[i] <- (as.numeric((mahn.bal$BeforeMatching)[[i]][7])) bal.stats$bm.var.ratios[i] <- (as.numeric((mahn.bal$BeforeMatching)[[i]][8])) bal.stats$am.std.diff[i] <- abs(as.numeric((mahn.bal$AfterMatching)[[i]][1])) bal.stats$am.diff.means.p[i] <- (as.numeric((mahn.bal$AfterMatching)[[i]][7])) bal.stats$am.var.ratios[i] <- (as.numeric((mahn.bal$AfterMatching)[[i]][8])) } dotchart(bal.stats$bm.std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) points(bal.stats$am.std.diff,1:9,col="red",pch=19) dotchart(bal.stats$bm.diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19,xlim=c(0,1)) points(bal.stats$am.diff.means.p,1:9,col="red",pch=19) dotchart(bal.stats$bm.var.ratios,labels=bal.stats$covar,main="Variance Ratios",color="black",pch=19) points(bal.stats$am.var.ratios,1:9,col="red",pch=19) ####CALIPER caliper <- c(rep(100,times=7),.2,rep(100,times=2)) match.caliper <- Match(Tr=treat, X=match.data,M=1,estimand="ATT",Weight=2,caliper=caliper) cal.bal <- MatchBalance(pscore.fmla, data = bal.data, match.out = match.caliper) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$bm.std.diff[i] <- abs(as.numeric((cal.bal$BeforeMatching)[[i]][1])) bal.stats$bm.diff.means.p[i] <- (as.numeric((cal.bal$BeforeMatching)[[i]][7])) bal.stats$bm.var.ratios[i] <- (as.numeric((cal.bal$BeforeMatching)[[i]][8])) bal.stats$am.std.diff[i] <- abs(as.numeric((cal.bal$AfterMatching)[[i]][1])) bal.stats$am.diff.means.p[i] <- (as.numeric((cal.bal$AfterMatching)[[i]][7])) bal.stats$am.var.ratios[i] <- (as.numeric((cal.bal$AfterMatching)[[i]][8])) } dotchart(bal.stats$bm.std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) points(bal.stats$am.std.diff,1:9,col="red",pch=19) dotchart(bal.stats$bm.diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19,xlim=c(0,1)) points(bal.stats$am.diff.means.p,1:9,col="red",pch=19) dotchart(bal.stats$bm.var.ratios,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19) points(bal.stats$am.var.ratios,1:9,col="red",pch=19) ###SUBCLASS match.data$subclass <- ifelse(bal.data$y1990.tasatot>=median(bal.data$y1990.tasatot),1,0) exact.match <- c(rep(FALSE,times=10),TRUE) match.subclass <- Match(Tr=treat, X=match.data,M=1,estimand="ATT",Weight=2,exact=exact.match) subclass.bal <- MatchBalance(pscore.fmla, data = bal.data, match.out = match.subclass) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$bm.std.diff[i] <- abs(as.numeric((subclass.bal$BeforeMatching)[[i]][1])) bal.stats$bm.diff.means.p[i] <- (as.numeric((subclass.bal$BeforeMatching)[[i]][7])) bal.stats$bm.var.ratios[i] <- (as.numeric((subclass.bal$BeforeMatching)[[i]][8])) bal.stats$am.std.diff[i] <- abs(as.numeric((subclass.bal$AfterMatching)[[i]][1])) bal.stats$am.diff.means.p[i] <- (as.numeric((subclass.bal$AfterMatching)[[i]][7])) bal.stats$am.var.ratios[i] <- (as.numeric((subclass.bal$AfterMatching)[[i]][8])) } dotchart(bal.stats$bm.std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) points(bal.stats$am.std.diff,1:9,col="red",pch=19) dotchart(bal.stats$bm.diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19,xlim=c(0,1)) points(bal.stats$am.diff.means.p,1:9,col="red",pch=19) dotchart(bal.stats$bm.var.ratios,labels=bal.stats$covar,main="Variance Ratio",color="black",pch=19) points(bal.stats$am.var.ratios,1:9,col="red",pch=19) ###COMBINE CALIPER WITH EXACT MATCHING match.data$subclass <- ifelse(bal.data$y1990.tminfec>=median(bal.data$y1990.tminfec),1,0) exact.match <- c(rep(FALSE,times=10),TRUE) caliper <- c(rep(100,times=1),.2,rep(100,times=4),.2,rep(100,times=4)) match.cal.sub <- Match(Tr=treat, X=match.data,M=1,estimand="ATT",Weight=2,caliper=caliper) cal.sub.bal <- MatchBalance(pscore.fmla, data = bal.data, match.out = match.cal.sub) bal.stats <- data.frame(covar= names(bal.data)) for (i in 1:9){ bal.stats$bm.std.diff[i] <- abs(as.numeric((cal.sub.bal$BeforeMatching)[[i]][1])) bal.stats$bm.diff.means.p[i] <- (as.numeric((cal.sub.bal$BeforeMatching)[[i]][7])) bal.stats$bm.var.ratios[i] <- (as.numeric((cal.sub.bal$BeforeMatching)[[i]][8])) bal.stats$am.std.diff[i] <- abs(as.numeric((cal.sub.bal$AfterMatching)[[i]][1])) bal.stats$am.diff.means.p[i] <- (as.numeric((cal.sub.bal$AfterMatching)[[i]][7])) bal.stats$am.var.ratios[i] <- (as.numeric((cal.sub.bal$AfterMatching)[[i]][8])) } dotchart(bal.stats$bm.std.diff,labels=bal.stats$covar,main="Standardized Difference",color="black",pch=19) points(bal.stats$am.std.diff,1:9,col="red",pch=19) dotchart(bal.stats$bm.diff.means.p,labels=bal.stats$covar,main="Difference in Means P Value",color="black",pch=19,xlim=c(0,1)) points(bal.stats$am.diff.means.p,1:9,col="red",pch=19) dotchart(bal.stats$bm.var.ratios,labels=bal.stats$covar,main="Variance Ratio",color="black",pch=19) points(bal.stats$am.var.ratios,1:9,col="red",pch=19) #Let's finally look at the outcome summary(Match(Y=(wfl.data$y1999.tasatot-wfl.data$y1990.tasatot),Tr=treat, X=match.data,M=1,estimand="ATT",Weight=2,exact=exact.match,caliper=caliper))