Add a diff option for bills to show file changes before writing updates to disk

This commit is contained in:
Joshua Tauberer
2022-05-14 11:47:03 +00:00
parent b8451e58a3
commit ef5ce90600
2 changed files with 30 additions and 20 deletions

View File

@@ -135,7 +135,10 @@ def process_bill(bill_id, options):
# Convert and write out data.json and data.xml.
utils.write(
json.dumps(bill_data, indent=2, sort_keys=True),
os.path.dirname(fdsys_xml_path) + '/data.json')
os.path.dirname(fdsys_xml_path) + '/data.json',
{
"diff": options.get("diff")
})
from bill_info import create_govtrack_xml
with open(os.path.dirname(fdsys_xml_path) + '/data.xml', 'wb') as xml_file:
@@ -148,7 +151,10 @@ def process_bill(bill_id, options):
# file under a new path.
utils.write(
utils.read(_path_to_billstatus_file(bill_id).replace(".xml", "-lastmod.txt")),
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"))
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"),
{
"diff": options.get("diff")
})
return {
"ok": True,
@@ -291,9 +297,18 @@ def reparse_actions(bill_id, options):
wrote_any = False
if options.get("diff"):
confirmer = utils.show_diff_ask_ok
else:
# If no --diff is given, just check that
# the content hasn't changed --- don't bother
# writing out anything with identical content.
def confirmer(source, revised, fn):
return source != revised
# Write new data.json file.
revised = json.dumps(bill_data, indent=2, sort_keys=True)
if utils.show_diff_ask_ok(source, revised, data_json_fn):
if confirmer(source, revised, data_json_fn):
utils.write(revised, data_json_fn)
wrote_any = True
@@ -303,7 +318,7 @@ def reparse_actions(bill_id, options):
with open(data_xml_fn, 'r') as xml_file:
source = xml_file.read()
revised = create_govtrack_xml(bill_data, options)
if utils.show_diff_ask_ok(source, revised.decode("utf8"), data_xml_fn):
if confirmer(source, revised.decode("utf8"), data_xml_fn):
with open(data_xml_fn, 'wb') as xml_file:
xml_file.write(revised)
wrote_any = True

View File

@@ -348,31 +348,26 @@ def write(content, destination, options={}):
if options.get("diff"):
# Instead of writing the file, do a comparison with what's on disk
# to test any changes. But be nice and replace any update date with
# what's in the previous file so we avoid spurrious changes. Use
# how updated_at appears in the JSON and in the XML.
# what's in the previous file so we avoid spurrious changes in the
# diff. Use how updated_at appears in the JSON and in the XML.
if os.path.exists(destination):
with open(destination) as f:
existing_content = f.read()
source = f.read()
revised = content
for pattern in ('"updated_at": ".*?"', 'updated=".*?"'):
m1 = re.search(pattern, existing_content)
m2 = re.search(pattern, content)
m1 = re.search(pattern, source)
m2 = re.search(pattern, revised)
if m1 and m2:
content = content.replace(m2.group(0), m1.group(0))
revised = revised.replace(m2.group(0), m1.group(0))
# Avoid writing to disk and spawning `diff` by checking if
# the files match in memory.
if content == existing_content:
if revised == source:
return
# Shell `diff` and let it display output directly to the console.
# Write `content` to disk first so diff can see it. Maybe more
# efficient to pipe?
fn = "/tmp/congress-changed-file"
with open(fn, 'w') as f:
f.write(content)
os.system("diff -u %s %s" % (destination, fn))
os.unlink(fn)
return
if not show_diff_ask_ok(source, revised, destination):
# User cancelled save.
return
# Save the content to disk.
mkdir_p(os.path.dirname(destination))