diff --git a/infrastructure-jobs/infrastructure-jobs.R b/infrastructure-jobs/infrastructure-jobs.R new file mode 100644 index 0000000..ed1345b --- /dev/null +++ b/infrastructure-jobs/infrastructure-jobs.R @@ -0,0 +1,50 @@ +# FiveThirtyEight.com +# Article: "Using ‘Infrastructure Jobs’ as a Measuring Stick For State-Level Spending" +# Published on: June 3, 2014 +# Article Author: Andrew Flowers (andrew.flowers@fivethirtyeight.com) +# Article URL: http://fivethirtyeight.com/datalab/using-infrastructure-jobs-as-a-measuring-stick-for-state-level-spending/ + +# Code Author: Andrew Flowers (andrew.flowers@fivethirtyeight.com) +# Dependent files: payroll-states.csv + +# Purpose: Get state-level data on "Heavy Construction and Civil Engineering" +# Will produce statepayrolls.csv file after running + +# Get data +temp<-tempfile() +download.file("http://download.bls.gov/pub/time.series/sm/sm.data.62.Construction.Current",temp) +statepay.raw<-read.table(temp,header=TRUE,sep="\t",stringsAsFactors=FALSE,strip.white=TRUE) +unlink(temp) + +# Add series info +series<-read.table("http://download.bls.gov/pub/time.series/sm/sm.series",sep="\t",header=TRUE,strip.white=TRUE) +state<-read.csv("payroll-states.csv",header=TRUE,strip.white=TRUE) +series<-merge(series,state,by="state_code") + +# Add industry info +industry<-read.table("http://download.bls.gov/pub/time.series/sm/sm.industry", sep="\t", header=TRUE, strip.white=TRUE) +industry$industry_name<-NULL +industry$industry_name<-row.names(industry) +row.names(industry)<-NULL +names(industry)<-c("industry_name","industry_code") + +series<-merge(series,industry,by="industry_code") + +statepay<-merge(statepay.raw,series,by="series_id") + +# Take out heavy construction industry data (which is coded 20237000) +heavyIndCodes<-c(20237000, 20237100, 20237200, 20237300, 20237900) +statepay.heavy<-statepay[grep(heavyIndCodes[1], statepay$industry_code),] + +# Clean state data +statepay.NSA<-subset(statepay.heavy,!period=="M13") +statepay.NSA<-subset(statepay.NSA, area_code==0) +statepay.NSA$date<-as.Date(paste(statepay.NSA$year,statepay.NSA$period,"01",sep="-"),"%Y-M%m-%d") +statepay.NSA<-subset(statepay.NSA,select=c("series_id","date","state_name","value")) + +# Convert to time series +require(reshape2) +statepay.NSA.t<-dcast(statepay.NSA, date ~ state_name,value.var="value") # ,fun.aggregate=mean) +write.csv(statepay.NSA.t,file="statepayrolls.csv") + + diff --git a/infrastructure-jobs/payroll-states.csv b/infrastructure-jobs/payroll-states.csv new file mode 100644 index 0000000..a897d64 --- /dev/null +++ b/infrastructure-jobs/payroll-states.csv @@ -0,0 +1 @@ +state_code,state_name 1,Alabama 2,Alaska 4,Arizona 5,Arkansas 6,California 8,Colorado 9,Connecticut 10,Delaware 11,District of Columbia 12,Florida 13,Georgia 15,Hawaii 16,Idaho 17,Illinois 18,Indiana 19,Iowa 20,Kansas 21,Kentucky 22,Louisiana 23,Maine 24,Maryland 25,Massachusetts 26,Michigan 27,Minnesota 28,Mississippi 29,Missouri 30,Montana 31,Nebraska 32,Nevada 33,New Hampshire 34,New Jersey 35,New Mexico 36,New York 37,North Carolina 38,North Dakota 39,Ohio 40,Oklahoma 41,Oregon 42,Pennsylvania 44,Rhode Island 45,South Carolina 46,South Dakota 47,Tennessee 48,Texas 49,Utah 50,Vermont 51,Virginia 53,Washington 54,West Virginia 55,Wisconsin 56,Wyoming 72,Puerto Rico 78,Virgin Islands \ No newline at end of file