-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathseminar_code.R
164 lines (114 loc) · 5.6 KB
/
seminar_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
library(RFigisGeo)
library(rgeos)
library(rgdal)
library(rjson)
library(plyr)
library(dplyr)
library(sp)
library(RColorBrewer)
library(lmtest)
library(spdep)
#Tell R where I want to work, this is optional, but saves time.
setwd("~/Google Drive/a&m_stuff/")
#Read shapefiles for all TX census tracts
txtracts<-readOGR(".", "gz_2010_48_140_00_500k")
#And a shapefile for counties in the AACOG
sa_cos<- readOGR(".", "sa_counties")
#make county polygons into lines
sa_co_lines<-as(sa_cos, "SpatialLines")
#extract tracts within the San Antonio area counties using a spatial intersection (GIS command)
sa_tracts<- RFigisGeo::intersection (txtracts,sa_cos )
plot(sa_tracts)
plot(sa_co_lines, add=T, col=2, lwd=2.5)
#Make an identifier that combines state, county and tract - this is the unique identifier from the Census, called the GEOID
sa_tracts$tract_id<-paste(as.character(sa_tracts$STATE.x), as.character(sa_tracts$COUNTY.x), as.character(sa_tracts$TRACT ), sep="")
#Get data from factfinder
#Here I use the Census developer API (Application program interface) to get data easily and on the fly.
#https://www.census.gov/developers/
#Here, i'm using the 2013 5 year American Community Survey data on census tracts in 8 counties in and around San Antonio
#The list of tables is here:
#http://api.census.gov/data/2013/acs5/variables.html
#DP05_0001E = total population
#DP02_0009PE = female hh with children %
#DP02_0040E = 15 TO 19 fertility rate per 1000 women
#DP02_0038E = unmarried fertility rate per 1000 women
#DP02_0066PE hs or higher %
#DP02_0067PE ba or higher %
#DP02_0080PE different house 1 year ago %
#DP02_0092PE foreign born %
#DP03_0005PE = unemployment rate %
#DP03_0028PE = service sector %
#DP03_0062E = median hh income
#DP03_0099PE = % without health insurance
#DP03_0101PE = under 18 without health insurance %
#DP03_0119PE = poverty rate %
#DP04_0046PE = housing units rented %
#DP04_0057PE = housing units without vehicle %
#DP04_0078PE = housing units with > 1.5 persons per room &
#DP05_0072PE nh_white %
#DP05_0073PE nh_black %
#DP05_0066PE latino %
#instructions on doing this are here:
#http://api.census.gov/data/2013/acs5/examples.html
myurl<-"http://api.census.gov/data/2013/acs5/profile?get=DP05_0001E,DP02_0009PE,DP02_0040E,DP02_0038E,DP02_0066PE,DP02_0067PE,DP02_0080PE,DP02_0092PE,DP03_0005PE,DP03_0028PE,DP03_0062E,DP03_0099PE,DP03_0101PE,DP03_0119PE,DP04_0046PE,DP04_0057PE,DP04_0078PE,DP05_0072PE,DP05_0073PE,DP05_0066PE&key=997fb9115102b709d5028501b4b030e84af62525&for=tract:*&in=state:48+county:029,091,259,325,013,019,493,187"
#Here I use some tools in R to read data from the API, and convert it to a flat table:
acs<-fromJSON(file=url(myurl) )
df<-ldply(acs[-1])
#give the table some names that make sense
names(df)<-c("totalpop","pfemhh", "teenfert", "unmarfert", "hsplus", "baplus", "diffhouse1yr", "pforeign", "unemployment","service","medianhhinc", "nohealthins","childnohealthins","poverty", "rented","novehicle", "highoccupancy", "pnhwhite", "pnhblack", "phispanic", "state", "county" ,"tract")
#The data come down, often times as character values, so I convert them all to numeric
df<- mutate_each(df, funs(as.numeric), -state, -county ,-tract)
#MAke the Census GEOID so I can merge it to my polygons from above
df$tractid<-paste(df$state, df$county, df$tract, sep="")
#Let's have a look
head(df)
#Merge the tracts to the tabular data
sa_tracts2<-merge(sa_tracts, df, by.x=c("COUNTY.x", "TRACT"), by.y= c("county", "tract"))
#get rid of some missing tracts, some of the military bases aren't really valid
sa_tracts2<-sa_tracts2[is.na(sa_tracts2$poverty)==F&is.na(sa_tracts2$childnohealthins)==F, ]
head(sa_tracts2@data)
#Save our new shapefile that has the data on it
#writeOGR(sa_tracts2, ".", "sa_tracts2", driver="ESRI Shapefile")
plot(sa_tracts2)
spplot(sa_tracts2,c( "pnhwhite", "pnhblack", "phispanic"), at=seq(0,100, 20),col.regions=brewer.pal(n=5, "Reds") , col=NA, main = "Distribution of Population by Race/Ethnicity- ACS 2013")
spplot(sa_tracts2,c( "poverty"),col.regions=brewer.pal(n=7, "Reds"), col=NA,at=seq(0,70,10),
main="ACS Poverty Rate Estimate 2015 5 Year Estimates")
nbs<- poly2nb(sa_tracts2, queen = T)
wts<-nb2listw(nbs, style = "W")
moran.mc(sa_tracts2$poverty, listw=wts, nsim=999, na.action=na.omit)
geary.mc(sa_tracts2$poverty, listw = wts, nsim = 999)
#OLS model for poverty
hist(sa_tracts2$poverty, main="AACOG Poverty Histogram")
fit <- lm( poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2)
summary(fit)
plot(fit)
hist(rstudent(fit))
bptest(fit)
sa_tracts2$olsresid<-rstudent(fit)
lm.morantest(fit, listw = wts)
spplot(sa_tracts2, "olsresid", at=quantile(sa_tracts2$olsresid), col.regions=brewer.pal(n=6, name="RdBu"), col=NA)
lm.LMtests(fit, listw=wts, test="all")
#spatial regression models
#error model
fit.e<-errorsarlm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, listw = wts)
summary(fit.e)
#lag model
fit.l<-lagsarlm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, listw = wts)
summary(fit.l)
#Durbin model
fit.ld<-lagsarlm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, listw = wts, type = "mixed")
summary(fit.ld)
AIC (fit.e)
AIC (fit.l)
AIC (fit.ld)
#Spatial Regimes
#split data by %Hispanic
sa_tracts2$hisp_cut<-cut(sa_tracts2$phispanic, breaks = 3)
table(sa_tracts2$hisp_cut)
spplot(sa_tracts2, "hisp_cut")
fit.r1<-lm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, subset = hisp_cut=="(-0.0994,33.1]")
fit.r2<-lm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, subset = hisp_cut=="(33.1,66.3]")
fit.r3<-lm(poverty ~baplus+ phispanic+pnhblack, data=sa_tracts2, subset = hisp_cut=="(66.3,99.5]")
summary(fit.r1)
summary(fit.r2)
summary(fit.r3)