#=========================================== # # computational economics: 2 Schelling Segregation # # author: John Eric Humphries johneric@uchicago.edu # date: 4-7-2014 # # abstract: A Schelling example # #=========================================== #======================== # Section 0: setup #======================== #setwd("/mnt/ide0/home/johneric/sbox/projects/neighborhoodVision/") rm(list=ls()) # Clear the workspace set.seed(777) library(compiler) #enableJIT(1) library(knitr) library(ggplot2) library(mvtnorm) library(reshape2) #========================= # OLS Function with bootstrap #========================= #------------------- # Section 1.1: generating data #------------------ GenerateDataSimple = function(n){ # Generates data with from a simple linear model # args: n - number of observations to generate # output: A list containing a Y vector and an X matrix x1 <- rbinom(n,10,.5) x2 <- rnorm(n,20,10) X <- cbind(1,x1,x2) theta2 <- 5 eps <- rnorm(n,0,sqrt(4)) beta <- matrix(cbind(2,3,1.4),3,1) Y <- X %*% beta + eps colnames(X) <- c("const", "x1","x2") colnames(Y) <- c("Y") return(list(Y,X)) } # Simulating simple data data <- data.frame(GenerateDataSimple(100)) attach(data) X = cbind(const,x1,x2) #------------------------------------ # Section 1.2: Defining sub functions #------------------------------------ #Don't actually use this below, just want to have it defined for teaching purposes SimpleOLS <- function(y=y,x=x) { beta_hat1 = solve(t(x) %*% x) %*% (t(x)%*% y) se1 = sqrt(diag((1/(length(y) - length(beta_hat1))) * (t(y - x%*%beta_hat1) %*% (y-x%*%beta_hat1))[1] * solve(t(x) %*% x))) out = list(t(beta_hat1),se1) return(out) } SumOfSquares = function(b,x=X,y=Y) { #Sum of squares function for numerical optimizer sum((y - x%*%b)^2) } #----------------------------- # Section 1.3: Defining our main function #---------------------------- # A function we built up in parts (may not have done bootstrapping if time did not permit) OLS = function(y=Y,x=X,method=1,bootstrap=0,bsnum=20) { #Calculates OLS using one of two methods: # Method == 1 Calculates OLS algebraically # Method == 2 Uses an optimizer to minimize the sum of squares if (method==1 & bootstrap==0) { beta_hat1 = solve(t(x) %*% x) %*% (t(x)%*% y) se1 = sqrt( diag( (1/(length(y) - length(beta_hat1))) * (t(y - x%*%beta_hat1) %*% (y-x%*%beta_hat1))[1] * solve(t(x) %*% x) ) ) out = list(t(beta_hat1),se1) }else if (method==2 & bootstrap==0) { beta_hat2 <- optim( c(0,0,0), SumOfSquares, method = "BFGS", x=x, y=y, hessian=T) out = list(beta_hat2$par,sqrt(diag(solve(beta_hat2$hessian)))) } else if (method==1 & bootstrap==1) { beta_hat1 = solve(t(x) %*% x) %*% (t(x)%*% y) bs_estimates = matrix(NA,bsnum, (dim(x)[2] ) ) n = length(y) print("Bootstrapping Standard Errors") for (i in 1:bsnum) { samp = sample(n,n,replace=T) Xboot = x[samp,] Yboot = y[samp] bs_estimates[i,] <- solve(t(Xboot) %*% Xboot) %*% (t(Xboot)%*% Yboot) } se1 = apply(bs_estimates,2,sd) out = list(t(beta_hat1),se1) }else if (method==2 & bootstrap==1) { beta_hat2 <- optim( c(0,0,0), SumOfSquares, method = "BFGS", x=x, y=y, hessian=T) bs_estimates = matrix(NA,bsnum, (dim(x)[2] ) ) n = length(y) print("Bootstrapping Standard Errors") for (i in 1:bsnum) { samp = sample(n,n,replace=T) Xboot = x[samp,] Yboot = y[samp] bs_estimates[i,] <- optim( c(0,0,0), SumOfSquares, method = "BFGS", x=Xboot, y=Yboot)$par } se2 = apply(bs_estimates,2,sd) out = list(beta_hat2$par,se2) }else{ stop("Error! Method or boostrap option not found") } class(out) <- "olsoutput" return(out) } #----------------------- # Section 1.4: Output #---------------------- summary(lm(Y ~ X)) OLS(Y,X) OLS(Y,X,method=2) OLS(Y,X,method=1,bootstrap=1,bsnum=100) OLS(Y,X,method=2,bootstrap=1) OLS(Y,X,method=2,bootstrap=1,bsnum=100) #--------------- # Section 1.5: Defining an object and a way for that object to print. #--------------- summary.olsoutput <- function(object) { beta <- round(as.numeric(object[[1]]),3) se <- round(as.numeric(object[[2]]),3) out <- cbind(beta,se) print(beta) print(se) print("Regression Results:") return(out) } summary(OLS(Y,X,method=2,bootstrap=1,bsnum=100)) summary(OLS(Y,X,method=1,bootstrap=1,bsnum=100)) summary(OLS(Y,X,method=2,bootstrap=0,bsnum=100)) summary(OLS(Y,X,method=1,bootstrap=0,bsnum=100)) #========================= # Section 2: Schelling Segregation Model. #========================= # We will first develop this as code, then turn it into a function. SchellingGrid <- function(gs = 100, probabilities = c(.495,.495,.1) , stop.val = .99 , happy.val = .4) { values = c(1,-1,0) grid.size = gs values.mat = matrix(sample(values, grid.size^2, replace = T, prob = probabilities), grid.size, grid.size ) # starting plot p <- (qplot(x=Var1, y=Var2, data=melt(values.mat), fill=value, geom="tile", color = "white", main="SCHELLING GRID: 0") + scale_fill_gradient2(low = "lightgreen", mid = "white", high = "steelblue") ) + theme(legend.position = "none") print(p) values.happy = matrix(-10, grid.size, grid.size) values.same = matrix(NA, grid.size, grid.size) ratio = happy.val stop.ratio = stop.val i = 0 while ( sum(values.happy, na.rm=T) / (sum(values.mat!=0, na.rm=T)) < stop.ratio) { i = i + 1 for (row in sample(1:grid.size)) { for (col in sample(1:grid.size)) { nbrs = c(rep(NA,8)) if( row>1 & col>1) nbrs[1] <- values.mat[row -1, col -1] if( row>1) nbrs[2] <- values.mat[row -1, col ] if( row>1 & col1) nbrs[4] <- values.mat[row , col -1] if( col1) nbrs[6] <- values.mat[row +1, col -1] if( row= ratio ) { values.happy[row, col] =1 values.same[row,col] = sum(nbrs==1 ,na.rm=T) / sum(!is.na(nbrs)) } } if (val == -1 ) { if (sum(nbrs== -1 , na.rm=T) / sum(!is.na(nbrs)) < ratio ) { values.mat[row,col] = 0 newhome = sample(which(values.mat==0),1) values.mat[newhome] = -1 values.happy[newhome] =0 values.happy[row, col] = NA } if (sum(nbrs== -1 ,na.rm=T) / sum(!is.na(nbrs)) >= ratio ) { values.happy[row, col] =1 values.same[row,col] = sum(nbrs== -1 ,na.rm=T) / sum(!is.na(nbrs)) } } if (val == 0) { values.happy[row,col] = NA } } # end column loop } # end row loop print(paste("Percent Happy:", 100 * sum(values.happy, na.rm=T) / (sum(values.mat!=0)), "(iteration", i, ")" )) # Printing percent happy p <- (qplot(x=Var1, y=Var2, data=melt(values.mat), fill=value, geom="tile", color = "white", main= paste("SCHELLING GRID:",i)) + scale_fill_gradient2(low = "lightgreen", mid = "white", high = "steelblue") ) + theme(legend.position = "none") if (i %% 5 == 0) print(p) # printing intermediatne plot every so many iterations } # end while statement # Printing final figure p <- (qplot(x=Var1, y=Var2, data=melt(values.mat), fill=value, geom="tile", color = "white", main= "SCHELLING GRID: (final)") + scale_fill_gradient2(low = "lightgreen", mid = "white", high = "steelblue") ) + theme(legend.position = "none") print(p) return(list(mean(values.happy, na.rm=T),mean(values.same, na.rm=T), i,p)) } # Running the function results.schelling = SchellingGrid(gs = 100, probabilities = c(.4955,.4955,.01) , stop.val = .95 , happy.val = .51)