From ef5ce9060080dd00a49587be92e614d05afa46f5 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sat, 14 May 2022 11:47:03 +0000 Subject: [PATCH] Add a diff option for bills to show file changes before writing updates to disk --- congress/tasks/bills.py | 23 +++++++++++++++++++---- congress/tasks/utils.py | 27 +++++++++++---------------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/congress/tasks/bills.py b/congress/tasks/bills.py index 694f9cd..92aec56 100644 --- a/congress/tasks/bills.py +++ b/congress/tasks/bills.py @@ -135,7 +135,10 @@ def process_bill(bill_id, options): # Convert and write out data.json and data.xml. utils.write( json.dumps(bill_data, indent=2, sort_keys=True), - os.path.dirname(fdsys_xml_path) + '/data.json') + os.path.dirname(fdsys_xml_path) + '/data.json', + { + "diff": options.get("diff") + }) from bill_info import create_govtrack_xml with open(os.path.dirname(fdsys_xml_path) + '/data.xml', 'wb') as xml_file: @@ -148,7 +151,10 @@ def process_bill(bill_id, options): # file under a new path. utils.write( utils.read(_path_to_billstatus_file(bill_id).replace(".xml", "-lastmod.txt")), - os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt")) + os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"), + { + "diff": options.get("diff") + }) return { "ok": True, @@ -291,9 +297,18 @@ def reparse_actions(bill_id, options): wrote_any = False + if options.get("diff"): + confirmer = utils.show_diff_ask_ok + else: + # If no --diff is given, just check that + # the content hasn't changed --- don't bother + # writing out anything with identical content. + def confirmer(source, revised, fn): + return source != revised + # Write new data.json file. revised = json.dumps(bill_data, indent=2, sort_keys=True) - if utils.show_diff_ask_ok(source, revised, data_json_fn): + if confirmer(source, revised, data_json_fn): utils.write(revised, data_json_fn) wrote_any = True @@ -303,7 +318,7 @@ def reparse_actions(bill_id, options): with open(data_xml_fn, 'r') as xml_file: source = xml_file.read() revised = create_govtrack_xml(bill_data, options) - if utils.show_diff_ask_ok(source, revised.decode("utf8"), data_xml_fn): + if confirmer(source, revised.decode("utf8"), data_xml_fn): with open(data_xml_fn, 'wb') as xml_file: xml_file.write(revised) wrote_any = True diff --git a/congress/tasks/utils.py b/congress/tasks/utils.py index d46a9ba..ad0159f 100644 --- a/congress/tasks/utils.py +++ b/congress/tasks/utils.py @@ -348,31 +348,26 @@ def write(content, destination, options={}): if options.get("diff"): # Instead of writing the file, do a comparison with what's on disk # to test any changes. But be nice and replace any update date with - # what's in the previous file so we avoid spurrious changes. Use - # how updated_at appears in the JSON and in the XML. + # what's in the previous file so we avoid spurrious changes in the + # diff. Use how updated_at appears in the JSON and in the XML. if os.path.exists(destination): with open(destination) as f: - existing_content = f.read() + source = f.read() + revised = content for pattern in ('"updated_at": ".*?"', 'updated=".*?"'): - m1 = re.search(pattern, existing_content) - m2 = re.search(pattern, content) + m1 = re.search(pattern, source) + m2 = re.search(pattern, revised) if m1 and m2: - content = content.replace(m2.group(0), m1.group(0)) + revised = revised.replace(m2.group(0), m1.group(0)) # Avoid writing to disk and spawning `diff` by checking if # the files match in memory. - if content == existing_content: + if revised == source: return - # Shell `diff` and let it display output directly to the console. - # Write `content` to disk first so diff can see it. Maybe more - # efficient to pipe? - fn = "/tmp/congress-changed-file" - with open(fn, 'w') as f: - f.write(content) - os.system("diff -u %s %s" % (destination, fn)) - os.unlink(fn) - return + if not show_diff_ask_ok(source, revised, destination): + # User cancelled save. + return # Save the content to disk. mkdir_p(os.path.dirname(destination))