mirror of
https://github.com/unitedstates/congress.git
synced 2026-03-25 14:00:05 -04:00
269 lines
9.2 KiB
Python
269 lines
9.2 KiB
Python
import re, logging, datetime, time, json
|
|
from lxml import etree
|
|
from lxml.html import fromstring
|
|
from amendment_text import fetch_amendment_text
|
|
import utils
|
|
|
|
from bill_info import sponsor_for, actions_for
|
|
|
|
# TODO:
|
|
# "TEXT OF AMENDMENT AS SUBMITTED"
|
|
# "COSPONSORS"
|
|
|
|
def fetch_amendment(amdt_id, options):
|
|
logging.info("\n[%s] Fetching..." % amdt_id)
|
|
|
|
# fetch bill details body
|
|
body = utils.download(
|
|
amdt_url_for(amdt_id),
|
|
amdt_cache_for(amdt_id, "information.html"),
|
|
options)
|
|
|
|
if not body:
|
|
return {'saved': False, 'ok': False, 'reason': "failed to download"}
|
|
|
|
if options.get("download_only", False):
|
|
return {'saved': False, 'ok': True, 'reason': "requested download only"}
|
|
|
|
amdt_type, number, congress = utils.split_bill_id(amdt_id)
|
|
|
|
actions = actions_for(body, amdt_id, is_amendment=True)
|
|
if actions is None: actions = []
|
|
parse_amendment_actions(actions)
|
|
|
|
amdt = {
|
|
'amendment_id': amdt_id,
|
|
'amendment_type': amdt_type,
|
|
'chamber': amdt_type[0],
|
|
'number': number,
|
|
'congress': congress,
|
|
|
|
'amends': amends_for(body, grab_bill=False),
|
|
'amends_bill': amends_for(body, grab_bill=True),
|
|
'house_number': house_number_for(body),
|
|
|
|
'offered_at': offered_at_for(body, 'offered'),
|
|
'submitted_at': offered_at_for(body, 'submitted'),
|
|
'proposed_at': offered_at_for(body, 'proposed'),
|
|
'sponsor': sponsor_for(body),
|
|
|
|
'title': amendment_simple_text_for(body, "title"),
|
|
'description': amendment_simple_text_for(body, "description"),
|
|
'purpose': amendment_simple_text_for(body, "purpose"),
|
|
|
|
'actions': actions,
|
|
|
|
'updated_at': datetime.datetime.fromtimestamp(time.time()),
|
|
}
|
|
|
|
set_amendment_status(amdt)
|
|
|
|
output_amendment(amdt, options)
|
|
|
|
if not options.get("fulltext", False):
|
|
return {'ok': True, 'saved': True}
|
|
|
|
#fetch amendment text
|
|
fulltext = fetch_amendment_text(body, amdt, options)
|
|
|
|
if not fulltext:
|
|
return {
|
|
'ok': True,
|
|
'saved': True,
|
|
'fulltext': False
|
|
}
|
|
|
|
outpt = "%s/%s/amendments/%s/%s%s/%s" % (utils.data_dir(), congress, amdt_type, amdt_type, number, "text.txt")
|
|
logging.info(outpt)
|
|
logging.info("[%s] Writing full text to disk..." % amdt['amendment_id'])
|
|
utils.write(fulltext, outpt)
|
|
return {
|
|
'ok': True,
|
|
'saved': True,
|
|
'fulltext': True
|
|
}
|
|
|
|
|
|
def output_amendment(amdt, options):
|
|
logging.info("[%s] Writing to disk..." % amdt['amendment_id'])
|
|
|
|
# output JSON - so easy!
|
|
utils.write(
|
|
json.dumps(amdt, sort_keys=True, indent=2, default=utils.format_datetime),
|
|
output_for_amdt(amdt['amendment_id'], "json")
|
|
)
|
|
|
|
# output XML
|
|
govtrack_type_codes = { 'hr': 'h', 's': 's', 'hres': 'hr', 'sres': 'sr', 'hjres': 'hj', 'sjres': 'sj', 'hconres': 'hc', 'sconres': 'sc' }
|
|
root = etree.Element("amendment")
|
|
root.set("session", amdt['congress'])
|
|
root.set("chamber", amdt['amendment_type'][0])
|
|
root.set("number", amdt['number'])
|
|
root.set("updated", utils.format_datetime(amdt['updated_at']))
|
|
|
|
make_node = utils.make_node
|
|
|
|
make_node(root, "amends", None,
|
|
type=govtrack_type_codes[amdt["amends_bill"]["bill_type"]],
|
|
number=str(amdt["amends_bill"]["number"]),
|
|
sequence=str(int(amdt["house_number"][1:])) if amdt["house_number"] else "") # chop off A from the house_number
|
|
|
|
make_node(root, "status", amdt['status'], datetime=amdt['status_at'])
|
|
|
|
if amdt['sponsor'] and amdt['sponsor']['type'] == 'person':
|
|
v = amdt['sponsor']['thomas_id']
|
|
if not options.get("govtrack", False):
|
|
make_node(root, "sponsor", None, thomas_id=v)
|
|
else:
|
|
v = str(utils.get_govtrack_person_id('thomas', v))
|
|
make_node(root, "sponsor", None, id=v)
|
|
elif amdt['sponsor'] and amdt['sponsor']['type'] == 'committee':
|
|
make_node(root, "sponsor", None, committee=amdt['sponsor']['name'])
|
|
else:
|
|
make_node(root, "sponsor", None)
|
|
|
|
make_node(root, "offered", None, datetime=amdt['offered_at'] if amdt['offered_at'] else amdt['submitted_at'])
|
|
|
|
if amdt["title"]: make_node(root, "title", amdt["title"])
|
|
make_node(root, "description", amdt["description"] if amdt["description"] else amdt["purpose"])
|
|
make_node(root, "purpose", amdt["purpose"])
|
|
|
|
actions = make_node(root, "actions", None)
|
|
for action in amdt['actions']:
|
|
a = make_node(actions,
|
|
action['type'] if action['type'] in ("vote",) else "action",
|
|
None,
|
|
datetime=action['acted_at'])
|
|
if action['type'] == 'vote':
|
|
a.set("how", action["how"])
|
|
a.set("result", action["result"])
|
|
if action.get("roll") != None: a.set("roll", str(action["roll"]))
|
|
if action.get('text'): make_node(a, "text", action['text'])
|
|
if action.get('in_committee'): make_node(a, "committee", None, name=action['in_committee'])
|
|
for cr in action['references']:
|
|
make_node(a, "reference", None, ref=cr['reference'], label=cr['type'])
|
|
|
|
utils.write(
|
|
etree.tostring(root, pretty_print=True),
|
|
output_for_amdt(amdt['amendment_id'], "xml")
|
|
)
|
|
|
|
def house_number_for(body):
|
|
match = re.search(r"H.AMDT.\d+</b>\n \((A\d+)\)", body, re.I)
|
|
if match:
|
|
return match.group(1)
|
|
else:
|
|
return None
|
|
|
|
def amends_for(body, grab_bill):
|
|
# When an amendment amends an amendment, the bill is listed first, followed by a comma
|
|
# and newline. Skip the bill when it exists and just parse the amendment.
|
|
match = re.search(r"Amends: "
|
|
+ ("(?:.*\n, )?" if not grab_bill else "")
|
|
+ "<a href=\"/cgi-bin/bdquery/z\?d(\d+):([A-Z]+)(\d+):",
|
|
body)
|
|
if match:
|
|
congress = int(match.group(1))
|
|
bill_type = utils.thomas_types_2[match.group(2)]
|
|
bill_number = int(match.group(3))
|
|
is_bill = bill_type not in ("samdt", "hamdt")
|
|
return {
|
|
"document_type": "bill" if is_bill else "amendment",
|
|
"congress": congress,
|
|
"bill_type" if is_bill else "amendment_type": bill_type,
|
|
"number": bill_number,
|
|
}
|
|
else:
|
|
raise Exception("Choked finding what the amendment amends.")
|
|
|
|
def offered_at_for(body, offer_type):
|
|
match = re.search(r"Sponsor:.*\n.*\(" + offer_type + " (\d+/\d+/\d+)", body, re.I)
|
|
if match:
|
|
date = match.group(1)
|
|
date = datetime.datetime.strptime(date, "%m/%d/%Y")
|
|
date = datetime.datetime.strftime(date, "%Y-%m-%d")
|
|
return date
|
|
else:
|
|
return None # not all of offered/submitted/proposed will be present
|
|
|
|
def amendment_simple_text_for(body, heading):
|
|
match = re.search(r"AMENDMENT " + heading.upper() + ":(<br />| )(.+)", body, re.I)
|
|
if match:
|
|
title = match.group(2).strip()
|
|
if title == "*** TITLE NOT FOUND ***":
|
|
return None
|
|
return title
|
|
else:
|
|
return None
|
|
|
|
def parse_amendment_actions(actions):
|
|
for action in actions:
|
|
# House Vote
|
|
m = re.match(r"On agreeing to the .* amendment (\(.*\) )?(Agreed to|Failed) (without objection|by [^\.:]+|by recorded vote: (\d+) - (\d+)(, \d+ Present)? \(Roll no. (\d+)\))\.", action['text'])
|
|
if m:
|
|
action["type"] = "vote"
|
|
|
|
if m.group(2) == "Agreed to":
|
|
action["result"] = "pass"
|
|
else:
|
|
action["result"] = "fail"
|
|
|
|
action["how"] = m.group(3)
|
|
if "recorded vote" in m.group(3):
|
|
action["how"] = "roll"
|
|
action["roll"] = int(m.group(7))
|
|
|
|
# Senate Vote
|
|
m = re.match(r"(Motion to table )?Amendment SA \d+ (as modified )?(agreed to|not agreed to) in Senate by ([^\.:\-]+|Yea-Nay( Vote)?. (\d+) - (\d+)(, \d+ Present)?. Record Vote Number: (\d+))\.", action['text'])
|
|
if m:
|
|
action["type"] = "vote"
|
|
if m.group(3) == "agreed to":
|
|
action["result"] = "pass"
|
|
if m.group(1): # is a motion to table, so result is sort of reversed.... eeek
|
|
action["result"] = "fail"
|
|
else:
|
|
if m.group(1): # is a failed motion to table, so this doesn't count as a vote on agreeing to the amendment
|
|
continue
|
|
action["result"] = "fail"
|
|
|
|
action["how"] = m.group(4)
|
|
if "Yea-Nay" in m.group(4):
|
|
action["how"] = "roll"
|
|
action["roll"] = int(m.group(9))
|
|
|
|
# Withdrawn
|
|
m = re.match(r"Proposed amendment SA \d+ withdrawn in Senate", action['text'])
|
|
if m:
|
|
action['type'] = 'withdrawn'
|
|
|
|
def set_amendment_status(amdt):
|
|
status = 'offered'
|
|
status_date = amdt['offered_at'] if amdt['offered_at'] else amdt['submitted_at']
|
|
|
|
for action in amdt['actions']:
|
|
if action['type'] == 'vote':
|
|
status = action['result'] # 'pass', 'fail'
|
|
status_date = action['acted_at']
|
|
if action['type'] == 'withdrawn':
|
|
status = 'withdrawn'
|
|
status_date = action['acted_at']
|
|
|
|
amdt['status'] = status
|
|
amdt['status_at'] = status_date
|
|
|
|
def amdt_url_for(amdt_id):
|
|
amdt_type, number, congress = utils.split_bill_id(amdt_id)
|
|
thomas_type = utils.thomas_types[amdt_type][0]
|
|
congress = int(congress)
|
|
number = int(number)
|
|
return "http://thomas.loc.gov/cgi-bin/bdquery/D?d%03d:%d:./list/bss/d%03d%s.lst::" % (congress, number, congress, thomas_type)
|
|
|
|
def amdt_cache_for(amdt_id, file):
|
|
amdt_type, number, congress = utils.split_bill_id(amdt_id)
|
|
return "%s/amendments/%s/%s%s/%s" % (congress, amdt_type, amdt_type, number, file)
|
|
|
|
def output_for_amdt(amdt_id, format):
|
|
amdt_type, number, congress = utils.split_bill_id(amdt_id)
|
|
return "%s/%s/amendments/%s/%s%s/%s" % (utils.data_dir(), congress, amdt_type, amdt_type, number, "data.%s" % format)
|
|
|