added files for infrastructure project

This commit is contained in:
andrewflowers
2014-06-03 18:10:21 -04:00
parent 3d2ea1d15c
commit e573d8d011
2 changed files with 51 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
# FiveThirtyEight.com
# Article: "Using Infrastructure Jobs as a Measuring Stick For State-Level Spending"
# Published on: June 3, 2014
# Article Author: Andrew Flowers (andrew.flowers@fivethirtyeight.com)
# Article URL: http://fivethirtyeight.com/datalab/using-infrastructure-jobs-as-a-measuring-stick-for-state-level-spending/
# Code Author: Andrew Flowers (andrew.flowers@fivethirtyeight.com)
# Dependent files: payroll-states.csv
# Purpose: Get state-level data on "Heavy Construction and Civil Engineering"
# Will produce statepayrolls.csv file after running
# Get data
temp<-tempfile()
download.file("http://download.bls.gov/pub/time.series/sm/sm.data.62.Construction.Current",temp)
statepay.raw<-read.table(temp,header=TRUE,sep="\t",stringsAsFactors=FALSE,strip.white=TRUE)
unlink(temp)
# Add series info
series<-read.table("http://download.bls.gov/pub/time.series/sm/sm.series",sep="\t",header=TRUE,strip.white=TRUE)
state<-read.csv("payroll-states.csv",header=TRUE,strip.white=TRUE)
series<-merge(series,state,by="state_code")
# Add industry info
industry<-read.table("http://download.bls.gov/pub/time.series/sm/sm.industry", sep="\t", header=TRUE, strip.white=TRUE)
industry$industry_name<-NULL
industry$industry_name<-row.names(industry)
row.names(industry)<-NULL
names(industry)<-c("industry_name","industry_code")
series<-merge(series,industry,by="industry_code")
statepay<-merge(statepay.raw,series,by="series_id")
# Take out heavy construction industry data (which is coded 20237000)
heavyIndCodes<-c(20237000, 20237100, 20237200, 20237300, 20237900)
statepay.heavy<-statepay[grep(heavyIndCodes[1], statepay$industry_code),]
# Clean state data
statepay.NSA<-subset(statepay.heavy,!period=="M13")
statepay.NSA<-subset(statepay.NSA, area_code==0)
statepay.NSA$date<-as.Date(paste(statepay.NSA$year,statepay.NSA$period,"01",sep="-"),"%Y-M%m-%d")
statepay.NSA<-subset(statepay.NSA,select=c("series_id","date","state_name","value"))
# Convert to time series
require(reshape2)
statepay.NSA.t<-dcast(statepay.NSA, date ~ state_name,value.var="value") # ,fun.aggregate=mean)
write.csv(statepay.NSA.t,file="statepayrolls.csv")

View File

@@ -0,0 +1 @@
state_code,state_name
1 state_code state_name 1 Alabama 2 Alaska 4 Arizona 5 Arkansas 6 California 8 Colorado 9 Connecticut 10 Delaware 11 District of Columbia 12 Florida 13 Georgia 15 Hawaii 16 Idaho 17 Illinois 18 Indiana 19 Iowa 20 Kansas 21 Kentucky 22 Louisiana 23 Maine 24 Maryland 25 Massachusetts 26 Michigan 27 Minnesota 28 Mississippi 29 Missouri 30 Montana 31 Nebraska 32 Nevada 33 New Hampshire 34 New Jersey 35 New Mexico 36 New York 37 North Carolina 38 North Dakota 39 Ohio 40 Oklahoma 41 Oregon 42 Pennsylvania 44 Rhode Island 45 South Carolina 46 South Dakota 47 Tennessee 48 Texas 49 Utah 50 Vermont 51 Virginia 53 Washington 54 West Virginia 55 Wisconsin 56 Wyoming 72 Puerto Rico 78 Virgin Islands