add bechdel data and script
This commit is contained in:
@@ -2,3 +2,5 @@ Article Date | Headline | File or folder
|
||||
---|---------|-------------
|
||||
March 17, 2014 | [FiveThirtyEight’s NCAA Tournament Predictions](http://fivethirtyeight.com/interactives/march-madness-predictions) | `march-madness-predictions`
|
||||
March 27, 2014 | [The NCAA Bracket: Checking Our Work](http://fivethirtyeight.com/datalab/the-ncaa-bracket-checking-our-work) | `historical-538-ncaa-tournament-model-results.csv`
|
||||
April 1, 2014 | [The Dollar-And-Cents Case Against Hollywood’s Exclusion of Women](http://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women) | `bechdel`
|
||||
|
||||
|
||||
79
bechdel/analyze-bechdel.R
Normal file
79
bechdel/analyze-bechdel.R
Normal file
@@ -0,0 +1,79 @@
|
||||
# Analyze movie data from the following webistes: www.bechdeltest.com and www.the-numbers.com,
|
||||
# calculate summary statistics and conduct basic regression analysis to test whether movies
|
||||
# which pass the Bechdel test have better or worse in boxoffice profits.
|
||||
|
||||
# By Andrew Flowers (andrew.flowers@fivethirtyeight.com)
|
||||
# See also http://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/
|
||||
# Dependent files: "movies.csv"
|
||||
|
||||
# Install and load required packages
|
||||
# install.packages(c("gdata", "cwhmisc"))
|
||||
library(gdata)
|
||||
library(cwhmisc)
|
||||
|
||||
# Load data
|
||||
rawData<-read.csv("movies.csv", na.strings="#N/A")
|
||||
|
||||
# Select movies pre-1990, and format $-denominated data fields
|
||||
rawData<-rawData[rawData$year>1989,]
|
||||
|
||||
# International-only gross profits (which equal total profits minus domestic profits)
|
||||
rawData$intOnly<-rawData$intgross_2013.-rawData$domgross_2013.
|
||||
|
||||
# Return on Investment (ROI) measures
|
||||
rawData$ROI<-rawData$intgross_2013./rawData$budget_2013. # Total ROI
|
||||
rawData$ROI1<-rawData$domgross_2013./rawData$budget_2013. # Domestic ROI
|
||||
rawData$ROI2<-rawData$intOnly/rawData$budget_2013. # International ROI
|
||||
|
||||
# Divide movies into FAIL and PASS divisions
|
||||
failMovies<-rawData[rawData$binary=="FAIL",]
|
||||
passMovies<-rawData[rawData$binary=="PASS",]
|
||||
|
||||
# Include a "generous" category (which includes both "ok" and "dubious" movies)
|
||||
generous<-rbind(rawData[rawData$clean_test=="ok",], rawData[rawData$clean_test=="dubious",])
|
||||
|
||||
# Print medians: ROI and budget
|
||||
median(failMovies$ROI, na.rm=T)
|
||||
median(passMovies$ROI, na.rm=T)
|
||||
median(rawData$ROI, na.rm=T)
|
||||
|
||||
median(failMovies$budget_2013.)
|
||||
median(passMovies$budget_2013.)
|
||||
median(rawData$budget_2013.)
|
||||
|
||||
# Distributions and logs
|
||||
hist(rawData$budget_2013.)
|
||||
hist(log(rawData$budget_2013.))
|
||||
|
||||
hist(rawData$intgross_2013.)
|
||||
hist(log(rawData$intgross_2013.))
|
||||
|
||||
hist(rawData$ROI)
|
||||
hist(log(rawData$ROI))
|
||||
|
||||
# Linear regression models
|
||||
|
||||
# Movies with higher budgets make more gross revenues
|
||||
summary(lm(log(intgross_2013.)~log(budget_2013.), data=rawData))
|
||||
|
||||
# Bechdel dummy is not significant
|
||||
summary(lm(log(intgross_2013.)~log(budget_2013.)+factor(binary), data=rawData))
|
||||
|
||||
# Movies with higher budgets have lower ROI
|
||||
summary(lm(log(ROI)~log(budget_2013.), data=rawData))
|
||||
|
||||
# Bechdel dummy is not significant
|
||||
summary(lm(log(ROI)~log(budget_2013.)+factor(binary), data=rawData))
|
||||
|
||||
# ROI #1 (domestic) used in chart
|
||||
median(generous$ROI1, na.rm=T)
|
||||
median(rawData$ROI1[rawData$clean_test=="men"], na.rm=T)
|
||||
median(rawData$ROI1[rawData$clean_test=="notalk"], na.rm=T)
|
||||
median(rawData$ROI1[rawData$clean_test=="nowomen"], na.rm=T)
|
||||
|
||||
# ROI #2 (international) used in chart
|
||||
median(generous$ROI2, na.rm=T)
|
||||
median(rawData$ROI2[rawData$clean_test=="men"], na.rm=T)
|
||||
median(rawData$ROI2[rawData$clean_test=="notalk"], na.rm=T)
|
||||
median(rawData$ROI2[rawData$clean_test=="nowomen"], na.rm=T)
|
||||
|
||||
1
bechdel/movies.csv
Normal file
1
bechdel/movies.csv
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user