add nutrition studies repo
This commit is contained in:
BIN
nutrition-studies/.RData
Normal file
BIN
nutrition-studies/.RData
Normal file
Binary file not shown.
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"path" : "~/editing/dieting-project",
|
||||
"sortOrder" : [
|
||||
{
|
||||
"ascending" : true,
|
||||
"columnIndex" : 2
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"activeTab" : 0
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"left" : {
|
||||
"panelheight" : 674,
|
||||
"splitterpos" : 283,
|
||||
"topwindowstate" : "NORMAL",
|
||||
"windowheight" : 713
|
||||
},
|
||||
"right" : {
|
||||
"panelheight" : 674,
|
||||
"splitterpos" : 421,
|
||||
"topwindowstate" : "NORMAL",
|
||||
"windowheight" : 713
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"TabSet1" : 0,
|
||||
"TabSet2" : 3
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
{"active_set":"","sets":[]}
|
||||
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/194B460A
Normal file
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/194B460A
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"tempName" : "Untitled1"
|
||||
}
|
||||
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/27AAD271
Normal file
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/27AAD271
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"tempName" : "Untitled1"
|
||||
}
|
||||
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/DDD9C5D8
Normal file
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/DDD9C5D8
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"tempName" : "Untitled1"
|
||||
}
|
||||
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/E17005B0
Normal file
3
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/E17005B0
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"tempName" : "Untitled1"
|
||||
}
|
||||
4
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/INDEX
Normal file
4
nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/INDEX
Normal file
@@ -0,0 +1,4 @@
|
||||
~%2Fediting%2Fdieting-project%2Fdiet_p_hacking.R="194B460A"
|
||||
~%2Fediting%2Fdieting-project%2Fscratch_work.R="27AAD271"
|
||||
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking.R="DDD9C5D8"
|
||||
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking_final.R="E17005B0"
|
||||
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"contents" : "",
|
||||
"created" : 1452009684761.000,
|
||||
"dirty" : false,
|
||||
"encoding" : "",
|
||||
"folds" : "",
|
||||
"hash" : "0",
|
||||
"id" : "3EF212EB",
|
||||
"lastKnownWriteTime" : 140735085965160,
|
||||
"path" : null,
|
||||
"project_path" : null,
|
||||
"properties" : {
|
||||
"cacheKey" : "l91i3fvvp3",
|
||||
"caption" : "regAnalysis",
|
||||
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=regAnalysis&cache_key=l91i3fvvp3",
|
||||
"displayedObservations" : "27716",
|
||||
"environment" : "",
|
||||
"object" : "regAnalysis",
|
||||
"totalObservations" : "27716",
|
||||
"variables" : "3"
|
||||
},
|
||||
"relative_order" : 2,
|
||||
"source_on_save" : false,
|
||||
"type" : "r_dataframe"
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"contents" : "# Diet P-hacking\n# Andrew Flowers <andrew.flowers@fivethirtyeight.com>\n\nsetwd(\"~/private-data//dieting-project/\")\n\nrequire(readr)\nrequire(plyr)\nrequire(dplyr)\nrequire(tidyr)\n\nrawData <- read.csv(\"raw_anonymized_data.csv\")\n\n# Fix innie/out characteristics\n\nrawData$belly <- revalue(rawData$belly, c(\"Innie\"=\"Yes\", \"Outie\"=\"No\"))\n\n# FFQ variable names (should total 1066)\n\nffq <- names(rawData)[28:1093]\n\n# Characteristic variable names (should total 26)\n\ncharacteristics <- names(rawData)[2:27]\n\n# Linear regressions with respondent characteristic predicting food frequency\n\nregValues <- data.frame(food=ffq)\n\nfor (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame\n\nfor (f in ffq){\n for (c in characteristics){\n \n frm <- formula(paste0(f, \"~\", c))\n reg <- summary(lm(data=rawData, formula=frm))\n regValues[which(regValues$food==f), c] <- reg$coefficients[8]\n }\n}\n\n# Extract p-values\n\nregAnalysis <- regValues %>%\n gather(\"characteristic\", \"p_values\", 2:27) %>% \n arrange(p_values)\n\n# Write out p-values\n\nwrite_csv(regAnalysis, \"p_values_analysis.csv\")\n\n# Note: This is an intentionally shady regression analysis. Both because of the \"p-hacking\" or \n# \"data mining\" behind running over 27,000 regresison, but also in that only the statistics reported \n# were the p-values of the characteristics (the independent variables).\n\n# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)\n",
|
||||
"created" : 1450735440535.000,
|
||||
"dirty" : false,
|
||||
"encoding" : "UTF-8",
|
||||
"folds" : "",
|
||||
"hash" : "2220922350",
|
||||
"id" : "BC3CEA54",
|
||||
"lastKnownWriteTime" : 1452010952,
|
||||
"path" : "~/private-data/dieting-project/diet_p_hacking_final.R",
|
||||
"project_path" : "diet_p_hacking_final.R",
|
||||
"properties" : {
|
||||
"tempName" : "Untitled1"
|
||||
},
|
||||
"relative_order" : 1,
|
||||
"source_on_save" : false,
|
||||
"type" : "r_source"
|
||||
}
|
||||
19
nutrition-studies/README.md
Normal file
19
nutrition-studies/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
### Nutrition Studies
|
||||
|
||||
This directory contains data and code behind the story [You Can’t Trust What You Read About Nutrition](http://fivethirtyeight.com/features/you-cant-trust-what-you-read-about-nutrition).
|
||||
|
||||
Many studies of diet and nutrition include multiple variables with vast amounts of data, making it easy to p-hack your way to sexy (and false) results. We learned this firsthand when we invited readers to take a survey about their eating habits known as the food frequency questionnaire and answer a few other questions about themselves. We ended up with 54 complete responses and looked for associations much as researchers look for links between foods and dreaded diseases. It was easy to find them.
|
||||
|
||||
*Warning*: This is evil (statistical) work. Do not go to the dark side. Do not try this at home.
|
||||
|
||||
This directory contains three files:
|
||||
|
||||
File | Description
|
||||
--- | -----
|
||||
`raw_anonymized_data.csv` | The FFQ and survey data from 54 respondents
|
||||
`p_hacking_final.R` | An R script that performs 27,716 regressions
|
||||
`p_values_analysis.csv` | The output data file listing the p-values
|
||||
|
||||
**Note:** This is an intentionally shady regression analysis, both because of the "p-hacking" or "data mining" behind running more than 27,000 regressions and because the statistics reported were the p-values of the characteristics (the independent variables).
|
||||
|
||||
**IN OTHER WORDS: THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS.**
|
||||
54
nutrition-studies/p_hacking.R
Normal file
54
nutrition-studies/p_hacking.R
Normal file
@@ -0,0 +1,54 @@
|
||||
# Diet P-hacking
|
||||
# Andrew Flowers <andrew.flowers@fivethirtyeight.com>
|
||||
|
||||
setwd("~/private-data//dieting-project/")
|
||||
|
||||
require(readr)
|
||||
require(plyr)
|
||||
require(dplyr)
|
||||
require(tidyr)
|
||||
|
||||
rawData <- read.csv("raw_anonymized_data.csv")
|
||||
|
||||
# Fix innie/out characteristics
|
||||
|
||||
rawData$belly <- revalue(rawData$belly, c("Innie"="Yes", "Outie"="No"))
|
||||
|
||||
# FFQ variable names (should total 1066)
|
||||
|
||||
ffq <- names(rawData)[28:1093]
|
||||
|
||||
# Characteristic variable names (should total 26)
|
||||
|
||||
characteristics <- names(rawData)[2:27]
|
||||
|
||||
# Linear regressions with respondent characteristic predicting food frequency
|
||||
|
||||
regValues <- data.frame(food=ffq)
|
||||
|
||||
for (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame
|
||||
|
||||
for (f in ffq){
|
||||
for (c in characteristics){
|
||||
|
||||
frm <- formula(paste0(f, "~", c))
|
||||
reg <- summary(lm(data=rawData, formula=frm))
|
||||
regValues[which(regValues$food==f), c] <- reg$coefficients[8]
|
||||
}
|
||||
}
|
||||
|
||||
# Extract p-values
|
||||
|
||||
regAnalysis <- regValues %>%
|
||||
gather("characteristic", "p_values", 2:27) %>%
|
||||
arrange(p_values)
|
||||
|
||||
# Write out p-values
|
||||
|
||||
write_csv(regAnalysis, "p_values_analysis.csv")
|
||||
|
||||
# Note: This is an intentionally shady regression analysis. Both because of the "p-hacking" or
|
||||
# "data mining" behind running over 27,000 regresison, but also in that only the statistics reported
|
||||
# were the p-values of the characteristics (the independent variables).
|
||||
|
||||
# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)
|
||||
27717
nutrition-studies/p_values_analysis.csv
Normal file
27717
nutrition-studies/p_values_analysis.csv
Normal file
File diff suppressed because it is too large
Load Diff
55
nutrition-studies/raw_anonymized_data.csv
Normal file
55
nutrition-studies/raw_anonymized_data.csv
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user