(FORCE) new commit with test data removed, generate json on saving data

This commit is contained in:
Joel Collins
2017-03-08 23:11:43 -05:00
parent d10c351fa5
commit 45a0ab5559
14 changed files with 66 additions and 9 deletions

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
alternate_formats/* -diff

View File

@@ -14,6 +14,7 @@ script:
- pyflakes .
- python test/are_files_linted.py
- python test/validate.py
- python test/test_json_matches.py
notifications:
email:

View File

@@ -474,7 +474,9 @@ The following script takes one required command line argument
where congress_number is the number of the congress to be updated. As of July, 2013, the permanent URL for future roll call data is unclear, and as such, the script may need to be modified when it is run for the 114th congress.
The following script may be run to create alternatly formatted data files. It takes no command-line arguments.
* alternate_bulk_formats.py creates four files (two each for current and historical legislators) in json and csv formats. The csv files do not include all fields from the legislator yaml files, and include data from the social media yaml. All four files are stored in the ~/alternate_formats directory.
* alternate_bulk_formats.py creates four files (two each for current and historical legislators) in csv formats. The csv files do not include all fields from the legislator yaml files, and include data from the social media yaml. All four files are stored in the ~/alternate_formats directory.
For each yaml there is a matching json file in the alternate_formats folder that should be kept up to date automatically if all updates were performed by scripts. After hand editing a yaml please run generate_json.py to update these json files.
Other Scripts
----------------------

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,4 @@
import csv
import json
import utils
def run():
@@ -63,11 +62,6 @@ def run():
print("Loading %s..." % filename)
legislators = utils.load_data(filename)
#convert yaml to json
utils.write(
json.dumps(legislators, sort_keys=True, indent=2, default=utils.format_datetime),
"../alternate_formats/%s.json" %filename.rstrip(".yaml"))
#convert yaml to csv
csv_output = csv.writer(open("../alternate_formats/%s.csv"%filename.rstrip(".yaml"),"w"))

21
scripts/generate_json.py Normal file
View File

@@ -0,0 +1,21 @@
import json
import utils
import glob
import os
def run():
#yaml filenames
yamls = list(map(os.path.basename, glob.glob("../*.yaml")))
for filename in yamls:
print("Converting %s..." % filename)
data = utils.load_data(filename)
#convert yaml to json
utils.write(
json.dumps(data, default=utils.format_datetime),
"../alternate_formats/%s.json" %filename.replace(".yaml", ""))
if __name__ == '__main__':
run()

View File

@@ -72,6 +72,7 @@ import pprint
import rtyaml
from datetime import datetime
import time
import json
import lxml.html # for meta redirect parsing
import yaml
@@ -188,8 +189,10 @@ def load_data(path):
return yaml_load(os.path.join(data_dir(), path))
def save_data(data, path):
return yaml_dump(data, os.path.join(data_dir(), path))
yaml_dump(data, os.path.join(data_dir(), path))
write(
json.dumps(data, default=format_datetime),
"../alternate_formats/%s" %path.replace(".yaml", ".json"))
##### Downloading

28
test/test_json_matches.py Normal file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python
import json
import rtyaml
import glob
def jsonKeys2str(x):
"""Some of the yamls have integer keys, which json converts to string.
in the future if there are keys that are strings that are intended to be left
as strings this may break"""
if isinstance(x, dict):
return {(int(k) if k.isdigit() else k):v for k, v in x.items()}
return x
yamls = glob.glob("*.yaml")
ret = 0
for path in yamls:
yaml_data = rtyaml.load(open(path))
json_data = json.load(
open("alternate_formats/{}".format(
path.replace(".yaml", ".json")), 'r'),
object_hook=jsonKeys2str)
if yaml_data != json_data:
ret = 1
print("Error: {} does not match the generated json.".format(path))
exit(ret)