(FORCE) new commit with test data removed, generate json on saving data

2025-12-19 09:50:37 -05:00 · 2017-03-08 23:11:43 -05:00
parent d10c351fa5
commit 45a0ab5559
14 changed files with 66 additions and 9 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+alternate_formats/* -diff
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,6 +14,7 @@ script:
 - pyflakes .
 - python test/are_files_linted.py
 - python test/validate.py
+ - python test/test_json_matches.py

 notifications:
  email:
--- a/README.md
+++ b/README.md
@@ -474,7 +474,9 @@ The following script takes one required command line argument
 where congress_number is the number of the congress to be updated. As of July, 2013, the permanent URL for future roll call data is unclear, and as such, the script may need to be modified when it is run for the 114th congress.

 The following script may be run to create alternatly formatted data files. It takes no command-line arguments.
-* alternate_bulk_formats.py creates four files (two each for current and historical legislators) in json and csv formats. The csv files do not include all fields from the legislator yaml files, and include data from the social media yaml. All four files are stored in the ~/alternate_formats directory.
+* alternate_bulk_formats.py creates four files (two each for current and historical legislators) in csv formats. The csv files do not include all fields from the legislator yaml files, and include data from the social media yaml. All four files are stored in the ~/alternate_formats directory.
+
+For each yaml there is a matching json file in the alternate_formats folder that should be kept up to date automatically if all updates were performed by scripts.  After hand editing a yaml please run generate_json.py to update these json files.

 Other Scripts
 ----------------------
--- a/alternate_formats/committee-membership-current.json
+++ b/alternate_formats/committee-membership-current.json
--- a/alternate_formats/committees-current.json
+++ b/alternate_formats/committees-current.json
--- a/alternate_formats/committees-historical.json
+++ b/alternate_formats/committees-historical.json
--- a/alternate_formats/executive.json
+++ b/alternate_formats/executive.json
--- a/alternate_formats/legislators-current.json
+++ b/alternate_formats/legislators-current.json
--- a/alternate_formats/legislators-historical.json
+++ b/alternate_formats/legislators-historical.json
--- a/alternate_formats/legislators-social-media.json
+++ b/alternate_formats/legislators-social-media.json
--- a/scripts/alternate_bulk_formats.py
+++ b/scripts/alternate_bulk_formats.py
@@ -1,5 +1,4 @@
 import csv
-import json
 import utils

 def run():
@@ -63,11 +62,6 @@ def run():
 		print("Loading %s..." % filename)
 		legislators = utils.load_data(filename)

-		#convert yaml to json
-		utils.write(
-		json.dumps(legislators, sort_keys=True, indent=2, default=utils.format_datetime),
-		"../alternate_formats/%s.json" %filename.rstrip(".yaml"))
-
 		#convert yaml to csv
 		csv_output = csv.writer(open("../alternate_formats/%s.csv"%filename.rstrip(".yaml"),"w"))

--- a/scripts/generate_json.py
+++ b/scripts/generate_json.py
@@ -0,0 +1,21 @@
+import json
+import utils
+import glob
+import os
+
+def run():
+
+	#yaml filenames
+    yamls = list(map(os.path.basename, glob.glob("../*.yaml")))
+
+    for filename in yamls:
+        print("Converting %s..." % filename)
+        data = utils.load_data(filename)
+
+		#convert yaml to json
+        utils.write(
+            json.dumps(data, default=utils.format_datetime),
+            "../alternate_formats/%s.json" %filename.replace(".yaml", ""))
+
+if __name__ == '__main__':
+    run()
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -72,6 +72,7 @@ import pprint
 import rtyaml
 from datetime import datetime
 import time
+import json

 import lxml.html # for meta redirect parsing
 import yaml
@@ -188,8 +189,10 @@ def load_data(path):
  return yaml_load(os.path.join(data_dir(), path))

 def save_data(data, path):
-  return yaml_dump(data, os.path.join(data_dir(), path))
-
+  yaml_dump(data, os.path.join(data_dir(), path))
+  write(
+		json.dumps(data, default=format_datetime),
+		"../alternate_formats/%s" %path.replace(".yaml", ".json"))

 ##### Downloading

--- a/test/test_json_matches.py
+++ b/test/test_json_matches.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+import json
+import rtyaml
+import glob
+
+def jsonKeys2str(x):
+    """Some of the yamls have integer keys, which json converts to string.
+    in the future if there are keys that are strings that are intended to be left
+    as strings this may break"""
+    if isinstance(x, dict):
+        return {(int(k) if k.isdigit() else k):v for k, v in x.items()}
+    return x
+
+yamls = glob.glob("*.yaml")
+
+ret = 0
+for path in yamls:
+    yaml_data = rtyaml.load(open(path))
+    json_data = json.load(
+	       open("alternate_formats/{}".format(
+		                 path.replace(".yaml", ".json")), 'r'),
+	       object_hook=jsonKeys2str)
+    if yaml_data != json_data:
+        ret = 1
+        print("Error: {} does not match the generated json.".format(path))
+
+exit(ret)