add nutrition studies repo

This commit is contained in:
Andrew Flowers
2016-01-05 16:59:10 -05:00
parent fb54c1e17d
commit 26bbe405e7
18 changed files with 27935 additions and 0 deletions

BIN
nutrition-studies/.RData Normal file

Binary file not shown.

View File

@@ -0,0 +1,9 @@
{
"path" : "~/editing/dieting-project",
"sortOrder" : [
{
"ascending" : true,
"columnIndex" : 2
}
]
}

View File

@@ -0,0 +1,3 @@
{
"activeTab" : 0
}

View File

@@ -0,0 +1,14 @@
{
"left" : {
"panelheight" : 674,
"splitterpos" : 283,
"topwindowstate" : "NORMAL",
"windowheight" : 713
},
"right" : {
"panelheight" : 674,
"splitterpos" : 421,
"topwindowstate" : "NORMAL",
"windowheight" : 713
}
}

View File

@@ -0,0 +1,4 @@
{
"TabSet1" : 0,
"TabSet2" : 3
}

View File

@@ -0,0 +1 @@
{"active_set":"","sets":[]}

View File

@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}

View File

@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}

View File

@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}

View File

@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}

View File

@@ -0,0 +1,4 @@
~%2Fediting%2Fdieting-project%2Fdiet_p_hacking.R="194B460A"
~%2Fediting%2Fdieting-project%2Fscratch_work.R="27AAD271"
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking.R="DDD9C5D8"
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking_final.R="E17005B0"

View File

@@ -0,0 +1,25 @@
{
"contents" : "",
"created" : 1452009684761.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "3EF212EB",
"lastKnownWriteTime" : 140735085965160,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "l91i3fvvp3",
"caption" : "regAnalysis",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=regAnalysis&cache_key=l91i3fvvp3",
"displayedObservations" : "27716",
"environment" : "",
"object" : "regAnalysis",
"totalObservations" : "27716",
"variables" : "3"
},
"relative_order" : 2,
"source_on_save" : false,
"type" : "r_dataframe"
}

View File

@@ -0,0 +1,18 @@
{
"contents" : "# Diet P-hacking\n# Andrew Flowers <andrew.flowers@fivethirtyeight.com>\n\nsetwd(\"~/private-data//dieting-project/\")\n\nrequire(readr)\nrequire(plyr)\nrequire(dplyr)\nrequire(tidyr)\n\nrawData <- read.csv(\"raw_anonymized_data.csv\")\n\n# Fix innie/out characteristics\n\nrawData$belly <- revalue(rawData$belly, c(\"Innie\"=\"Yes\", \"Outie\"=\"No\"))\n\n# FFQ variable names (should total 1066)\n\nffq <- names(rawData)[28:1093]\n\n# Characteristic variable names (should total 26)\n\ncharacteristics <- names(rawData)[2:27]\n\n# Linear regressions with respondent characteristic predicting food frequency\n\nregValues <- data.frame(food=ffq)\n\nfor (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame\n\nfor (f in ffq){\n for (c in characteristics){\n \n frm <- formula(paste0(f, \"~\", c))\n reg <- summary(lm(data=rawData, formula=frm))\n regValues[which(regValues$food==f), c] <- reg$coefficients[8]\n }\n}\n\n# Extract p-values\n\nregAnalysis <- regValues %>%\n gather(\"characteristic\", \"p_values\", 2:27) %>% \n arrange(p_values)\n\n# Write out p-values\n\nwrite_csv(regAnalysis, \"p_values_analysis.csv\")\n\n# Note: This is an intentionally shady regression analysis. Both because of the \"p-hacking\" or \n# \"data mining\" behind running over 27,000 regresison, but also in that only the statistics reported \n# were the p-values of the characteristics (the independent variables).\n\n# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)\n",
"created" : 1450735440535.000,
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "2220922350",
"id" : "BC3CEA54",
"lastKnownWriteTime" : 1452010952,
"path" : "~/private-data/dieting-project/diet_p_hacking_final.R",
"project_path" : "diet_p_hacking_final.R",
"properties" : {
"tempName" : "Untitled1"
},
"relative_order" : 1,
"source_on_save" : false,
"type" : "r_source"
}

View File

@@ -0,0 +1,19 @@
### Nutrition Studies
This directory contains data and code behind the story [You Cant Trust What You Read About Nutrition](http://fivethirtyeight.com/features/you-cant-trust-what-you-read-about-nutrition).
Many studies of diet and nutrition include multiple variables with vast amounts of data, making it easy to p-hack your way to sexy (and false) results. We learned this firsthand when we invited readers to take a survey about their eating habits known as the food frequency questionnaire and answer a few other questions about themselves. We ended up with 54 complete responses and looked for associations much as researchers look for links between foods and dreaded diseases. It was easy to find them.
*Warning*: This is evil (statistical) work. Do not go to the dark side. Do not try this at home.
This directory contains three files:
File | Description
--- | -----
`raw_anonymized_data.csv` | The FFQ and survey data from 54 respondents
`p_hacking_final.R` | An R script that performs 27,716 regressions
`p_values_analysis.csv` | The output data file listing the p-values
**Note:** This is an intentionally shady regression analysis, both because of the "p-hacking" or "data mining" behind running more than 27,000 regressions and because the statistics reported were the p-values of the characteristics (the independent variables).
**IN OTHER WORDS: THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS.**

View File

@@ -0,0 +1,54 @@
# Diet P-hacking
# Andrew Flowers <andrew.flowers@fivethirtyeight.com>
setwd("~/private-data//dieting-project/")
require(readr)
require(plyr)
require(dplyr)
require(tidyr)
rawData <- read.csv("raw_anonymized_data.csv")
# Fix innie/out characteristics
rawData$belly <- revalue(rawData$belly, c("Innie"="Yes", "Outie"="No"))
# FFQ variable names (should total 1066)
ffq <- names(rawData)[28:1093]
# Characteristic variable names (should total 26)
characteristics <- names(rawData)[2:27]
# Linear regressions with respondent characteristic predicting food frequency
regValues <- data.frame(food=ffq)
for (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame
for (f in ffq){
for (c in characteristics){
frm <- formula(paste0(f, "~", c))
reg <- summary(lm(data=rawData, formula=frm))
regValues[which(regValues$food==f), c] <- reg$coefficients[8]
}
}
# Extract p-values
regAnalysis <- regValues %>%
gather("characteristic", "p_values", 2:27) %>%
arrange(p_values)
# Write out p-values
write_csv(regAnalysis, "p_values_analysis.csv")
# Note: This is an intentionally shady regression analysis. Both because of the "p-hacking" or
# "data mining" behind running over 27,000 regresison, but also in that only the statistics reported
# were the p-values of the characteristics (the independent variables).
# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long