Files
congress/tasks/bill_info.py

1151 lines
46 KiB
Python

import utils
import logging
import re
import json
from lxml import etree
import copy
import datetime
def create_govtrack_xml(bill, options):
govtrack_type_codes = {'hr': 'h', 's': 's', 'hres': 'hr', 'sres': 'sr', 'hjres': 'hj', 'sjres': 'sj', 'hconres': 'hc', 'sconres': 'sc'}
root = etree.Element("bill")
root.set("session", bill['congress'])
root.set("type", govtrack_type_codes[bill['bill_type']])
root.set("number", bill['number'])
root.set("updated", utils.format_datetime(bill['updated_at']))
def make_node(parent, tag, text, **attrs):
if options.get("govtrack", False):
# Rewrite bioguide_id attributes as just id with GovTrack person IDs.
attrs2 = {}
for k, v in attrs.items():
if v:
if k == "bioguide_id":
# remap "bioguide_id" attributes to govtrack "id"
k = "id"
v = str(utils.translate_legislator_id('bioguide', v, 'govtrack'))
attrs2[k] = v
attrs = attrs2
return utils.make_node(parent, tag, text, **attrs)
# for American Memory Century of Lawmaking bills...
for source in bill.get("sources", []):
n = make_node(root, "source", "")
for k, v in sorted(source.items()):
if k == "source":
n.text = v
elif k == "source_url":
n.set("url", v)
else:
n.set(k, unicode(v))
if "original_bill_number" in bill:
make_node(root, "bill-number", bill["original_bill_number"])
make_node(root, "state", bill['status'], datetime=bill['status_at'])
old_status = make_node(root, "status", None)
make_node(old_status, "introduced" if bill['status'] in ("INTRODUCED", "REFERRED") else "unknown", None, datetime=bill['status_at']) # dummy for the sake of comparison
make_node(root, "introduced", None, datetime=bill['introduced_at'])
titles = make_node(root, "titles", None)
for title in bill['titles']:
n = make_node(titles, "title", title['title'])
n.set("type", title['type'])
if title['as']:
n.set("as", title['as'])
if title['is_for_portion']:
n.set("partial", "1")
if bill['sponsor']:
# TODO: Sponsored by committee?
make_node(root, "sponsor", None, bioguide_id=bill['sponsor']['bioguide_id'])
else:
make_node(root, "sponsor", None)
cosponsors = make_node(root, "cosponsors", None)
for cosp in bill['cosponsors']:
n = make_node(cosponsors, "cosponsor", None, bioguide_id=cosp["bioguide_id"])
if cosp["sponsored_at"]:
n.set("joined", cosp["sponsored_at"])
if cosp["withdrawn_at"]:
n.set("withdrawn", cosp["withdrawn_at"])
actions = make_node(root, "actions", None)
for action in bill['actions']:
a = make_node(actions,
action['type'] if action['type'] in ("vote", "vote-aux", "calendar", "topresident", "signed", "enacted", "vetoed") else "action",
None,
datetime=action['acted_at'])
if action.get("status"):
a.set("state", action["status"])
if action['type'] in ('vote', 'vote-aux'):
a.clear() # re-insert date between some of these attributes
a.set("how", action["how"])
a.set("type", action["vote_type"])
if action.get("roll") != None:
a.set("roll", action["roll"])
a.set("datetime", utils.format_datetime(action['acted_at']))
a.set("where", action["where"])
a.set("result", action["result"])
if action.get("suspension"):
a.set("suspension", "1")
if action.get("status"):
a.set("state", action["status"])
if action['type'] == 'calendar' and "calendar" in action:
a.set("calendar", action["calendar"])
if action["under"]:
a.set("under", action["under"])
if action["number"]:
a.set("number", action["number"])
if action['type'] == 'enacted':
a.clear() # re-insert date between some of these attributes
a.set("number", "%s-%s" % (bill['congress'], action["number"]))
a.set("type", action["law"])
a.set("datetime", utils.format_datetime(action['acted_at']))
if action.get("status"):
a.set("state", action["status"])
if action['type'] == 'vetoed':
if action.get("pocket"):
a.set("pocket", "1")
if action.get('text'):
make_node(a, "text", action['text'])
if action.get('in_committee'):
make_node(a, "committee", None, name=action['in_committee'])
for cr in action['references']:
make_node(a, "reference", None, ref=cr['reference'], label=cr['type'])
committees = make_node(root, "committees", None)
for cmt in bill['committees']:
make_node(committees, "committee", None, code=(cmt["committee_id"] + cmt["subcommittee_id"]) if cmt.get("subcommittee_id", None) else cmt["committee_id"], name=cmt["committee"], subcommittee=cmt.get("subcommittee").replace("Subcommittee on ", "") if cmt.get("subcommittee") else "", activity=", ".join(c.title() for c in cmt["activity"]))
relatedbills = make_node(root, "relatedbills", None)
for rb in bill['related_bills']:
if rb['type'] == "bill":
rb_bill_type, rb_number, rb_congress = utils.split_bill_id(rb['bill_id'])
make_node(relatedbills, "bill", None, session=rb_congress, type=govtrack_type_codes[rb_bill_type], number=rb_number, relation="unknown" if rb['reason'] == "related" else rb['reason'])
subjects = make_node(root, "subjects", None)
if bill['subjects_top_term']:
make_node(subjects, "term", None, name=bill['subjects_top_term'])
for s in bill['subjects']:
if s != bill['subjects_top_term']:
make_node(subjects, "term", None, name=s)
amendments = make_node(root, "amendments", None)
for amd in bill['amendments']:
make_node(amendments, "amendment", None, number=amd["chamber"] + str(amd["number"]))
if bill.get('summary'):
make_node(root, "summary", bill['summary']['text'], date=bill['summary']['date'], status=bill['summary']['as'])
committee_reports = make_node(root, "committee-reports", None)
for report in bill['committee_reports']:
make_node(committee_reports, "report", report)
return etree.tostring(root, pretty_print=True)
def sponsor_for(sponsor_dict):
if sponsor_dict is None:
# TODO: This can hopefully be removed. In testing s414-113
# was missing sponsor data. But all bills have a sponsor?
return None
# TODO: Don't do regex matching here. Find another way.
m = re.match(r'(?P<title>(Rep|Sen))\. (?P<name>.*?) +\[(?P<party>[DRI])-(?P<state>[A-Z][A-Z])(-(?P<district>\d{1,2}|At Large))?\]$',
sponsor_dict['fullName'])
if m.group("district") is None:
district = None # a senator
elif m.group("district") == "At Large":
district = None # TODO: For backwards compatibility, we're returning None, but 0 would be better.
else:
# TODO: For backwards compatibility, we're returning a string, but an int would be better.
district = m.group('district')
return {
'title': m.group("title"),
'name': m.group("name"), # the firstName, middleName, lastName fields have inconsistent capitalization - some are all uppercase
'district': district,
'state': m.group('state'),
#'party': m.group('party'),
'bioguide_id': sponsor_dict['bioguideId'],
'type': 'person'
}
def summary_for(summaries):
# Some bills are missing the summaries entirely?
if summaries is None:
return None
# Take the most recent summary, by looking at the lexicographically last updateDate.
summaries = summaries['item']
summary = sorted(summaries, key = lambda s: s['updateDate'])[-1]
# Build dict.
return {
"date": summary['updateDate'],
"as": summary['name'],
"text": strip_tags(summary['text']),
}
def strip_tags(text):
# Preserve paragraph breaks. Convert closing p tags (and surrounding whitespace) into two newlines. Strip trailing whitespace
text = re.sub("\s*</\s*p\s*>\s*", "\n\n", text).strip()
# naive stripping of tags, should work okay in this limited context
text = re.sub("<[^>]+>", "", text)
# compress and strip whitespace artifacts, except for the paragraph breaks
text = re.sub("[ \t\r\f\v]{2,}", " ", text).strip()
# Replace HTML entities with characters.
text = utils.unescape(text)
return text
def committees_for(committee_list):
if committee_list is None:
return []
committee_list = committee_list['item']
activity_text_map = {
"Referred to": ["referral"],
"Hearings by": ["hearings"],
"Markup by": ["markup"],
"Reported by": ["reporting"],
"Discharged from": ["discharged"],
"Reported original measure": ["origin", "reporting"],
}
def fix_subcommittee_name(name):
return re.sub("(.*) Subcommittee$",
lambda m : "Subcommittee on " + m.group(1),
name)
def get_activitiy_list(item):
if not item['activities']:
return []
return sum([activity_text_map.get(i['name'], [i['name']]) for i in item['activities']['item']], [])
def fixup_committee_name(name):
# Preserve backwards compatiblity.
if name == "House House Administration":
return "House Administration"
return name
def build_dict(item):
committee_dict = {
'activity': get_activitiy_list(item),
'committee': fixup_committee_name(item['chamber'] + ' ' + re.sub(" Committee$", "", item['name'])),
'committee_id': item['systemCode'][0:-2].upper(),
}
subcommittees_list = []
if 'subcommittees' in item and item['subcommittees'] is not None:
for subcommittee in item['subcommittees']['item']:
subcommittee_dict = copy.deepcopy(committee_dict)
subcommittee_dict.update({
'subcommittee': fix_subcommittee_name(subcommittee['name']),
'subcommittee_id': subcommittee['systemCode'][-2:],
'activity': get_activitiy_list(subcommittee),
})
subcommittees_list.append(subcommittee_dict)
return [committee_dict] + subcommittees_list
return sum([build_dict(committee) for committee in committee_list], [])
def titles_for(title_list):
def build_dict(item):
full_type = item['titleType']
is_for_portion = False
# "Official Titles as Introduced", "Short Titles on Conference report"
splits = re.split(" as | on ", full_type, 1)
if len(splits) == 2:
title_type, state = splits
if state.endswith(" for portions of this bill"):
is_for_portion = True
state = state.replace(" for portions of this bill" ,"")
state = state.replace(":", "").lower()
else:
title_type, state = full_type, None
if "Popular Title" in title_type:
title_type = "popular"
elif "Short Title" in title_type:
title_type = "short"
elif "Official Title" in title_type:
title_type = "official"
elif "Display Title" in title_type:
title_type = "display"
elif title_type == "Non-bill-report":
# TODO: What kind of title is this? Maybe assign
# a better title_type code once we know.
title_type = "nonbillreport"
else:
raise Exception("Unknown title type: " + title_type)
return {
'title': item['title'],
'is_for_portion': is_for_portion,
'as': state,
'type': title_type
}
titles = [build_dict(title) for title in title_list]
# THOMAS used to give us the titles in a particular order:
# short as introduced
# short as introduced (for portion)
# short as some later stage
# short as some later stage (for portion)
# official as introduced
# official as some later stage
# The "as" stages (introduced, etc.) were in the order in which actions
# actually occurred. This was handy because to get the current title for
# a bill, you need to know which action type was most recent. The new
# order is reverse-chronological, so we have to turn the order around
# for backwards compatibility. Rather than do a simple .reverse(), I'm
# adding an explicit sort order here which gets very close to the THOMAS
# order.
# Unfortunately this can no longer be relied on because the new bulk
# data has the "as" stages sometimes in the wrong order: The "reported to
# senate" status for House bills seems to be consistently out of place.
titles_copy = list(titles) # clone before beginning sort
def first_index_of(**kwargs):
for i, title in enumerate(titles_copy):
for k, v in kwargs.items():
k = k.replace("_", "")
if title.get(k) != v:
break
else:
# break not called --- all match
return i
titles.sort(key = lambda title: (
# keep the same 'short', 'official', 'display' order intact
first_index_of(type=title['type']),
# within each of those categories, reverse the 'as' order
-first_index_of(type=title['type'], _as=title.get('as')),
# put titles for portions last, within the type/as category
title['is_for_portion'],
# and within that, just sort alphabetically, case-insensitively (which is
# what it appears THOMAS used to do)
title['title'].lower(),
))
return titles
# the most current title of a given type is the first one in the last 'as' subgroup
# of the titles for the whole bill (that is, if there's no title for the whole bill
# in the last 'as' subgroup, use the previous 'as' subgroup and so on) --- we think
# this logic matches THOMAS/Congress.gov.
def current_title_for(titles, title_type):
current_title = None
current_as = -1 # not None, cause for popular titles, None is a valid 'as'
for title in titles:
if title['type'] != title_type or title['is_for_portion'] == True:
continue
if title['as'] == current_as:
continue
# right type, new 'as', store first one
current_title = title['title']
current_as = title['as']
return current_title
def actions_for(action_list, bill_id, title):
# The bulk XML data has action history information from multiple sources. For
# major actions, the Library of Congress (code 9) action item often duplicates
# the information of a House/Senate action item. We have to skip one so that we
# don't tag multiple history items with the same parsed action info, which
# would imply the action (like a vote) ocurred multiple times. THOMAS appears
# to have suppressed the Library of Congress action lines in certain cases
# to avoid duplication - they were not in our older data files.
#
# Also, there are some ghost action items with totally empty text. Remove those.
# TODO: When removed from upstream data, we can remove that check.
closure = {
"prev": None,
}
def keep_action(item, closure):
if item['text'] in (None, ""):
return False
keep = True
if closure['prev']:
if item['sourceSystem']['code'] == "9":
# Date must match previous action..
# If both this and previous have a time, the times must match.
# The text must approximately match. Sometimes the LOC text has a prefix
# and different whitespace. And they may drop references -- so we'll
# use our action_for helper function to drop references from both
# prior to the string comparison.
if item['actionDate'] == closure["prev"]["actionDate"] \
and (item.get('actionTime') == closure["prev"].get("actionTime") or not item.get('actionTime') or not closure["prev"].get("actionTime")) \
and action_for(item)['text'].replace(" ", "").endswith(action_for(closure["prev"])['text'].replace(" ", "")):
keep = False
closure['prev'] = item
return keep
action_list = [item for item in action_list
if keep_action(item, closure)]
# Turn the actions into dicts. The actions are in reverse-chronological
# order in the bulk data XML. Process them in chronological order so that
# our bill status logic sees the actions in the right order.
def build_dict(item, closure):
action_dict = action_for(item)
extra_action_info, new_status = parse_bill_action(action_dict, closure['prev_status'], bill_id, title)
# only change/reflect status change if there was one
if new_status:
action_dict['status'] = new_status
closure['prev_status'] = new_status
# add additional parsed fields
if extra_action_info:
action_dict.update(extra_action_info)
return action_dict
closure = {
"prev_status": "INTRODUCED",
}
return [build_dict(action, closure) for action in reversed(action_list)]
# clean text, pull out the action type, any other associated metadata with an action
def action_for(item):
# acted_at
if not item.get('actionTime'):
acted_at = item.get('actionDate', '')
else:
# Although we get the action date & time in an ISO-ish format (split
# across two fields), and although we know it's in local time at the
# U.S. Capitol (i.e. U.S. Eastern), we don't know the UTC offset which
# is a part of how we used to serialize the time. So parse and then
# use pytz (via format_datetime) to re-serialize.
acted_at = utils.format_datetime(datetime.datetime.strptime(item.get('actionDate', '') + " " + item['actionTime'], "%Y-%m-%d %H:%M:%S"))
# text & references
# (amendment actions don't always have text?)
text = item['text'] if item['text'] is not None else ''
# strip out links
text = re.sub(r"</?[Aa]( \S.*?)?>", "", text)
# remove and extract references
references = []
match = re.search("\s*\(([^)]+)\)\s*$", text)
if match:
# remove the matched section
text = text[0:match.start()] + text[match.end():]
types = match.group(1)
# fix use of comma or colon instead of a semi colon between reference types
# have seen some accidental capitalization combined with accidental comma, thus the 'T'
# e.g. "text of Title VII as reported in House: CR H3075-3077, Text omission from Title VII:" (hr5384-109)
types = re.sub("[,:] ([a-zT])", r"; \1", types)
# fix "CR:"
types = re.sub("CR:", "CR", types)
# fix a missing semicolon altogether between references
# e.g. sres107-112, "consideration: CR S1877-1878 text as"
types = re.sub("(\d+) +([a-z])", r"\1; \2", types)
for reference in re.split("; ?", types):
if ": " not in reference:
type, reference = None, reference
else:
type, reference = reference.split(": ", 1)
references.append({'type': type, 'reference': reference})
# form dict
action_dict = {
'acted_at': acted_at,
'action_code': item.get('actionCode', ''),
'committees': [item['committee']['systemCode'][0:-2].upper()] if item['committee'] else None,
'references': references,
'type': 'action', # replaced by parse_bill_action if a regex matches
'text': text,
}
if not action_dict["committees"]:
# remove if empty - not present in how we used to generate the file
del action_dict["committees"]
return action_dict
def cosponsors_for(cosponsors_list):
if cosponsors_list is None:
return []
cosponsors_list = cosponsors_list['item']
def build_dict(item):
cosponsor_dict = sponsor_for(item)
del cosponsor_dict["type"] # always 'person'
cosponsor_dict.update({
'sponsored_at': item['sponsorshipDate'],
'withdrawn_at': item['sponsorshipWithdrawnDate'],
'original_cosponsor': item['isOriginalCosponsor'] == 'True'
})
return cosponsor_dict
cosponsors = [build_dict(cosponsor) for cosponsor in cosponsors_list]
# TODO: Can remove. Sort like the old THOMAS order to make diffs easier.
cosponsors.sort(key = lambda c: c['name'].lower())
return cosponsors
def related_bills_for(related_bills_list):
if related_bills_list is None:
return []
related_bills_list = related_bills_list['item']
def build_dict(item):
return {
'reason': item['relationshipDetails']['item'][0]['type'].replace('bill', '').strip().lower(),
'bill_id': '{0}{1}-{2}'.format(item['type'].replace('.', '').lower(), item['number'], item['congress']),
'type': 'bill',
'identified_by': item['relationshipDetails']['item'][0]['identifiedBy']
}
# Are these THOMAS related bill relation texts gone from the bulk data?
reasons = (
("Identical bill identified by (CRS|House|Senate)", "identical"),
("Companion bill", "identical"),
("Related bill (as )?identified by (CRS|the House Clerk's office|House committee|Senate)", "related"),
("passed in (House|Senate) in lieu of .*", "supersedes"),
("Rule related to .* in (House|Senate)", "rule"),
("This bill has text inserted from .*", "includes"),
("Text from this bill was inserted in .*", "included-in"),
("Bill related to rule .* in House", "ruled-by"),
("This bill caused other related action on .*", "caused-action"),
("Other related action happened to this bill because of .*", "action-caused-by"),
("Bill that causes .* to be laid on table in House", "caused-action"),
("Bill laid on table by virtue of .* passage in House", "action-caused-by"),
("Bill that caused the virtual passage of .* in House", "caused-action"),
("Bill passed by virtue of .* passage in House", "caused-action-by"),
("Bill on wich enrollment has been corrected by virtue of .* passage in House", "caused-action"),
)
return [build_dict(related_bill) for related_bill in related_bills_list]
# get the public or private law number from any enacted action
def slip_law_from(actions):
for action in actions:
if action["type"] == "enacted":
return {
'law_type': action["law"],
'congress': action["congress"],
'number': action["number"]
}
# find the latest status change in a set of processed actions
def latest_status(actions, introduced_at):
status, status_date = "INTRODUCED", introduced_at
for action in actions:
if action.get('status', None):
status = action['status']
status_date = action['acted_at']
return status, status_date
# look at the final set of processed actions and pull out the major historical events
def history_from_actions(actions):
history = {}
activation = activation_from(actions)
if activation:
history['active'] = True
history['active_at'] = activation['acted_at']
else:
history['active'] = False
house_vote = None
for action in actions:
if (action['type'] == 'vote') and (action['where'] == 'h') and (action['vote_type'] != "override"):
house_vote = action
if house_vote:
history['house_passage_result'] = house_vote['result']
history['house_passage_result_at'] = house_vote['acted_at']
senate_vote = None
for action in actions:
if (action['type'] == 'vote') and (action['where'] == 's') and (action['vote_type'] != "override"):
senate_vote = action
if senate_vote:
history['senate_passage_result'] = senate_vote['result']
history['senate_passage_result_at'] = senate_vote['acted_at']
senate_vote = None
for action in actions:
if (action['type'] == 'vote-aux') and (action['vote_type'] == 'cloture') and (action['where'] == 's') and (action['vote_type'] != "override"):
senate_vote = action
if senate_vote:
history['senate_cloture_result'] = senate_vote['result']
history['senate_cloture_result_at'] = senate_vote['acted_at']
vetoed = None
for action in actions:
if action['type'] == 'vetoed':
vetoed = action
if vetoed:
history['vetoed'] = True
history['vetoed_at'] = vetoed['acted_at']
else:
history['vetoed'] = False
house_override_vote = None
for action in actions:
if (action['type'] == 'vote') and (action['where'] == 'h') and (action['vote_type'] == "override"):
house_override_vote = action
if house_override_vote:
history['house_override_result'] = house_override_vote['result']
history['house_override_result_at'] = house_override_vote['acted_at']
senate_override_vote = None
for action in actions:
if (action['type'] == 'vote') and (action['where'] == 's') and (action['vote_type'] == "override"):
senate_override_vote = action
if senate_override_vote:
history['senate_override_result'] = senate_override_vote['result']
history['senate_override_result_at'] = senate_override_vote['acted_at']
enacted = None
for action in actions:
if action['type'] == 'enacted':
enacted = action
if enacted:
history['enacted'] = True
history['enacted_at'] = action['acted_at']
else:
history['enacted'] = False
topresident = None
for action in actions:
if action['type'] == 'topresident':
topresident = action
if topresident and (not history['vetoed']) and (not history['enacted']):
history['awaiting_signature'] = True
history['awaiting_signature_since'] = action['acted_at']
else:
history['awaiting_signature'] = False
return history
# find the first action beyond the standard actions every bill gets.
# - if the bill's first action is "referral" then the first action not those
# most common
# e.g. hr3590-111 (active), s1-113 (inactive)
# - if the bill's first action is "action", then the next action, if one is present
# resolutions
# e.g. sres5-113 (active), sres4-113 (inactive)
# - if the bill's first action is anything else (e.g. "vote"), then that first action
# bills that skip committee
# e.g. s227-113 (active)
def activation_from(actions):
# there's NOT always at least one :(
# as of 2013-06-10, hr2272-113 has no actions at all
if len(actions) == 0:
return None
first = actions[0]
if first['type'] in ["referral", "calendar", "action"]:
for action in actions[1:]:
if (action['type'] != "referral") and (action['type'] != "calendar") and ("Sponsor introductory remarks" not in action['text']):
return action
return None
else:
return first
def parse_bill_action(action_dict, prev_status, bill_id, title):
"""Parse a THOMAS bill action line. Returns attributes to be set in the XML file on the action line."""
bill_type, number, congress = utils.split_bill_id(bill_id)
line = action_dict['text']
status = None
action = {
"type": "action"
}
# If a line starts with an amendment number, this action is on the amendment and cannot
# be parsed yet.
m = re.search(r"^(H|S)\.Amdt\.(\d+)", line, re.I)
if m != None:
# Process actions specific to amendments separately.
return None, None
# Otherwise, parse the action line for key actions.
# VOTES
# A House Vote.
line = re.sub(", the Passed", ", Passed", line)
# 106 h4733 and others
m = re.search("("
+ "|".join([
"On passage",
"Passed House",
"Two-thirds of the Members present having voted in the affirmative the bill is passed,?",
"On motion to suspend the rules and pass the (?:bill|resolution)",
"On agreeing to the (?:resolution|conference report)",
"On motion to suspend the rules and agree to the (?:resolution|conference report)",
"House Agreed to Senate Amendments.*?",
"On motion that the House (?:suspend the rules and )?(?:agree(?: with an amendment)? to|concur in) the Senate amendments?(?: to the House amendments?| to the Senate amendments?)*",
])
+ ")"
+ "(, the objections of the President to the contrary notwithstanding.?)?"
+ "(, as amended| \(Amended\))?"
+ " (Passed|Failed|Agreed to|Rejected)?"
+ " ?(by voice vote|without objection|by (the Yeas and Nays|Yea-Nay Vote|recorded vote)"
+ "(:? \(2/3 required\))?: (\d+ - \d+(, \d+ Present)? [ \)]*)?\((Roll no\.|Record Vote No:) \d+\))",
line, re.I)
if m != None:
motion, is_override, as_amended, pass_fail, how = m.group(1), m.group(2), m.group(3), m.group(4), m.group(5)
# print line
# print m.groups()
if re.search(r"Passed House|House Agreed to", motion, re.I):
pass_fail = 'pass'
elif re.search("(ayes|yeas) had prevailed", line, re.I):
pass_fail = 'pass'
elif re.search(r"Pass|Agreed", pass_fail, re.I):
pass_fail = 'pass'
else:
pass_fail = 'fail'
if "Two-thirds of the Members present" in motion:
is_override = True
if is_override:
vote_type = "override"
elif re.search(r"(agree (with an amendment )?to|concur in) the Senate amendment", line, re.I):
vote_type = "pingpong"
elif re.search("conference report", line, re.I):
vote_type = "conference"
elif bill_type[0] == "h":
vote_type = "vote"
else:
vote_type = "vote2"
roll = None
m = re.search(r"\((Roll no\.|Record Vote No:) (\d+)\)", how, re.I)
if m != None:
how = "roll" # normalize the ugly how
roll = m.group(2)
suspension = None
if roll and "On motion to suspend the rules" in motion:
suspension = True
# alternate form of as amended, e.g. hr3979-113
if "that the House agree with an amendment" in motion:
as_amended = True
action["type"] = "vote"
action["vote_type"] = vote_type
action["how"] = how
action['where'] = "h"
action['result'] = pass_fail
if roll:
action["roll"] = roll
action["suspension"] = suspension
# correct upstream data error
if bill_id == "s2012-114" and "Roll no. 250" in line: as_amended = True
if bill_id == "s2943-114" and "On passage Passed without objection" in line: as_amended = True
# get the new status of the bill after this vote
new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, suspension, as_amended, title, prev_status)
if new_status:
status = new_status
# Passed House, not necessarily by an actual vote (think "deem")
m = re.search(r"Passed House pursuant to|House agreed to Senate amendment (with amendment )?pursuant to", line, re.I)
if m != None:
vote_type = "vote" if (bill_type[0] == "h") else "vote2"
if "agreed to Senate amendment" in line: vote_type = "pingpong"
pass_fail = "pass"
as_amended = bool(m.group(1))
action["type"] = "vote"
action["vote_type"] = vote_type
action["how"] = "by special rule"
action["where"] = "h"
action["result"] = pass_fail
# get the new status of the bill after this vote
new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, False, as_amended, title, prev_status)
if new_status:
status = new_status
# A Senate Vote
# (There are some annoying weird cases of double spaces which are taken care of
# at the end.)
m = re.search("("
+ "|".join([
"Passed Senate",
"Failed of passage in Senate",
"Disagreed to in Senate",
"Resolution agreed to in Senate",
"Senate (?:agreed to|concurred in) (?:the )?(?:conference report|House amendment(?: to the Senate amendments?| to the House amendments?)*)",
r"Cloture \S*\s?on the motion to proceed .*?not invoked in Senate",
r"Cloture(?: motion)? on the motion to proceed to the (?:bill|measure) invoked in Senate",
"Cloture invoked in Senate",
"Cloture on (?:the motion to proceed to )?the bill (?:not )?invoked in Senate",
"(?:Introduced|Received|Submitted) in the Senate, (?:read twice, |considered, |read the third time, )+and (?:passed|agreed to)",
])
+ ")"
+ "(,?.*,?) "
+ "(without objection|by Unanimous Consent|by Voice Vote|(?:by )?Yea-Nay( Vote)?\. \d+\s*-\s*\d+\. Record Vote (No|Number): \d+)",
line.replace(" ", " "), re.I)
if m != None:
motion, extra, how = m.group(1), m.group(2), m.group(3)
roll = None
# put disagreed check first, cause "agreed" is contained inside it
if re.search("disagreed", motion, re.I):
pass_fail = "fail"
elif re.search("passed|agreed|concurred|bill invoked|measure invoked|cloture invoked", motion, re.I):
pass_fail = "pass"
else:
pass_fail = "fail"
voteaction_type = "vote"
if re.search("over veto", extra, re.I):
vote_type = "override"
elif re.search("conference report", motion, re.I):
vote_type = "conference"
elif re.search("cloture", motion, re.I):
vote_type = "cloture"
voteaction_type = "vote-aux" # because it is not a vote on passage
elif re.search("Senate agreed to (the )?House amendment|Senate concurred in (the )?House amendment", motion, re.I):
vote_type = "pingpong"
elif bill_type[0] == "s":
vote_type = "vote"
else:
vote_type = "vote2"
m = re.search(r"Record Vote (No|Number): (\d+)", how, re.I)
if m != None:
roll = m.group(2)
how = "roll"
as_amended = False
if re.search(r"with amendments|with an amendment", extra, re.I):
as_amended = True
action["type"] = voteaction_type
action["vote_type"] = vote_type
action["how"] = how
action["result"] = pass_fail
action["where"] = "s"
if roll:
action["roll"] = roll
# get the new status of the bill after this vote
new_status = new_status_after_vote(vote_type, pass_fail == "pass", "s", bill_type, False, as_amended, title, prev_status)
if new_status:
status = new_status
# OLD-STYLE VOTES (93rd Congress-ish)
m = re.search(r"Measure passed (House|Senate)(, amended(?: \(.*?\)|, with an amendment to the title)?)?(?:,? in lieu[^,]*)?(?:, roll call #(\d+) \(\d+-\d+\))?", line, re.I)
if m != None:
chamber = m.group(1)[0].lower() # 'h' or 's'
as_amended = m.group(2)
roll_num = m.group(3)
# GovTrack legacy scraper missed these: if chamber == 's' and (as_amended or roll_num or "lieu" in line): return action, status
pass_fail = "pass"
vote_type = "vote" if bill_type[0] == chamber else "vote2"
action["type"] = "vote"
action["vote_type"] = vote_type
action["how"] = "(method not recorded)" if not roll_num else "roll"
if roll_num:
action["roll"] = roll_num
action["result"] = pass_fail
action["where"] = chamber
new_status = new_status_after_vote(vote_type, pass_fail == "pass", chamber, bill_type, False, as_amended, title, prev_status)
if new_status:
status = new_status
m = re.search(r"(House|Senate) agreed to (?:House|Senate) amendments?( with an amendment)?( under Suspension of the Rules)?(?:, roll call #(\d+) \(\d+-\d+\))?\.", line, re.I)
if m != None:
chamber = m.group(1)[0].lower() # 'h' or 's'
as_amended = m.group(2)
suspension = m.group(3)
roll_num = m.group(4)
# GovTrack legacy scraper missed these: if (chamber == 'h' and not roll_num) or (chamber == 's' and rull_num): return action, status # REMOVE ME
pass_fail = "pass"
vote_type = "pingpong"
action["type"] = "vote"
action["vote_type"] = vote_type
action["how"] = "(method not recorded)" if not roll_num else "roll"
if roll_num:
action["roll"] = roll_num
action["result"] = pass_fail
action["where"] = chamber
action["suspension"] = (suspension != None)
new_status = new_status_after_vote(vote_type, pass_fail == "pass", chamber, bill_type, False, as_amended, title, prev_status)
if new_status:
status = new_status
# PSUDO-REPORTING (because GovTrack did this, but should be changed)
# TODO: Make a new status for this as pre-reported.
m = re.search(r"Placed on (the )?([\w ]+) Calendar( under ([\w ]+))?[,\.] Calendar No\. (\d+)\.|Committee Agreed to Seek Consideration Under Suspension of the Rules|Ordered to be Reported", line, re.I)
if m != None:
# TODO: This makes no sense.
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"
action["type"] = "calendar"
# TODO: Useless. But good for GovTrack compatibility.
if m.group(2): # not 'Ordered to be Reported'
action["calendar"] = m.group(2)
action["under"] = m.group(4)
action["number"] = m.group(5)
# COMMITTEE ACTIONS
# reported
m = re.search(r"Committee on (.*)\. Reported by", line, re.I)
if m != None:
action["type"] = "reported"
action["committee"] = m.group(1)
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"
m = re.search(r"Reported to Senate from the (.*?)( \(without written report\))?\.", line, re.I)
if m != None: # 93rd Congress
action["type"] = "reported"
action["committee"] = m.group(1)
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"
# hearings held by a committee
m = re.search(r"(Committee on .*?)\. Hearings held", line, re.I)
if m != None:
action["committee"] = m.group(1)
action["type"] = "hearings"
m = re.search(r"Committee on (.*)\. Discharged (by Unanimous Consent)?", line, re.I)
if m != None:
action["committee"] = m.group(1)
action["type"] = "discharged"
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"
m = re.search("Cleared for White House|Presented to President", line, re.I)
if m != None:
action["type"] = "topresident"
m = re.search("Signed by President", line, re.I)
if m != None:
action["type"] = "signed"
status = "ENACTED:SIGNED"
m = re.search("Pocket Vetoed by President", line, re.I)
if m != None:
action["type"] = "vetoed"
action["pocket"] = "1"
status = "VETOED:POCKET"
# need to put this in an else, or this regex will match the pocket veto and override it
else:
m = re.search("Vetoed by President", line, re.I)
if m != None:
action["type"] = "vetoed"
status = "PROV_KILL:VETO"
m = re.search("Sent to Archivist of the United States unsigned", line, re.I)
if m != None:
status = "ENACTED:TENDAYRULE"
m = re.search("^(?:Became )?(Public|Private) Law(?: No:)? ([\d\-]+)\.", line, re.I)
if m != None:
action["law"] = m.group(1).lower()
pieces = m.group(2).split("-")
action["congress"] = pieces[0]
action["number"] = pieces[1]
action["type"] = "enacted"
if prev_status in ("ENACTED:SIGNED", "ENACTED:VETO_OVERRIDE", "ENACTED:TENDAYRULE"):
pass # this is a final administrative step
elif prev_status == "PROV_KILL:VETO" or prev_status.startswith("VETOED:"):
# somehow missed the override steps
status = "ENACTED:VETO_OVERRIDE"
elif bill_id in ("s2641-93", "hr1589-94", "s2527-100", "hr1677-101", "hr2978-101", "hr2126-104", "s1322-104"):
status = "ENACTED:TENDAYRULE"
else:
raise Exception("Missing Signed by President action? If this is a case of the 10-day rule, hard code the bill id %s here." % bill_id)
# Check for referral type
m = re.search(r"Referred to (?:the )?(House|Senate)?\s?(?:Committee|Subcommittee)?", line, re.I)
if m != None:
action["type"] = "referral"
if prev_status == "INTRODUCED":
status = "REFERRED"
# sweep the action line for bill IDs of related bills
bill_ids = utils.extract_bills(line, congress)
bill_ids = filter(lambda b: b != bill_id, bill_ids)
if bill_ids and (len(bill_ids) > 0):
action['bill_ids'] = bill_ids
return action, status
def new_status_after_vote(vote_type, passed, chamber, bill_type, suspension, amended, title, prev_status):
if vote_type == "vote": # vote in originating chamber
if passed:
if bill_type in ("hres", "sres"):
return 'PASSED:SIMPLERES' # end of life for a simple resolution
if chamber == "h":
return 'PASS_OVER:HOUSE' # passed by originating chamber, now in second chamber
else:
return 'PASS_OVER:SENATE' # passed by originating chamber, now in second chamber
if suspension:
return 'PROV_KILL:SUSPENSIONFAILED' # provisionally killed by failure to pass under suspension of the rules
if chamber == "h":
return 'FAIL:ORIGINATING:HOUSE' # outright failure
else:
return 'FAIL:ORIGINATING:SENATE' # outright failure
if vote_type in ("vote2", "pingpong"): # vote in second chamber or subsequent pingpong votes
if passed:
if amended:
# mesure is passed but not in identical form
if chamber == "h":
return 'PASS_BACK:HOUSE' # passed both chambers, but House sends it back to Senate
else:
return 'PASS_BACK:SENATE' # passed both chambers, but Senate sends it back to House
else:
# bills and joint resolutions not constitutional amendments, not amended from Senate version
if bill_type in ("hjres", "sjres") and title.startswith("Proposing an amendment to the Constitution of the United States"):
return 'PASSED:CONSTAMEND' # joint resolution that looks like an amendment to the constitution
if bill_type in ("hconres", "sconres"):
return 'PASSED:CONCURRENTRES' # end of life for concurrent resolutions
return 'PASSED:BILL' # passed by second chamber, now on to president
if vote_type == "pingpong":
# chamber failed to accept the other chamber's changes, but it can vote again
return 'PROV_KILL:PINGPONGFAIL'
if suspension:
return 'PROV_KILL:SUSPENSIONFAILED' # provisionally killed by failure to pass under suspension of the rules
if chamber == "h":
return 'FAIL:SECOND:HOUSE' # outright failure
else:
return 'FAIL:SECOND:SENATE' # outright failure
if vote_type == "cloture":
if not passed:
return "PROV_KILL:CLOTUREFAILED"
else:
return None
if vote_type == "override":
if not passed:
if bill_type[0] == chamber:
if chamber == "h":
return 'VETOED:OVERRIDE_FAIL_ORIGINATING:HOUSE'
else:
return 'VETOED:OVERRIDE_FAIL_ORIGINATING:SENATE'
else:
if chamber == "h":
return 'VETOED:OVERRIDE_FAIL_SECOND:HOUSE'
else:
return 'VETOED:OVERRIDE_FAIL_SECOND:SENATE'
else:
if bill_type[0] == chamber:
if chamber == "h":
return 'VETOED:OVERRIDE_PASS_OVER:HOUSE'
else:
return 'VETOED:OVERRIDE_PASS_OVER:SENATE'
else:
# The override passed both chambers -- the veto is overridden.
return "ENACTED:VETO_OVERRIDE"
if vote_type == "conference":
# This is tricky to integrate into status because we have to wait for both
# chambers to pass the conference report.
if passed:
if prev_status.startswith("CONFERENCE:PASSED:"):
if bill_type in ("hjres", "sjres") and title.startswith("Proposing an amendment to the Constitution of the United States"):
return 'PASSED:CONSTAMEND' # joint resolution that looks like an amendment to the constitution
if bill_type in ("hconres", "sconres"):
return 'PASSED:CONCURRENTRES' # end of life for concurrent resolutions
return 'PASSED:BILL'
else:
if chamber == "h":
return 'CONFERENCE:PASSED:HOUSE'
else:
return 'CONFERENCE:PASSED:SENATE'
return None
def amendments_for(amendment_list):
if amendment_list is None:
return []
amendment_list = amendment_list['amendment']
def build_dict(item):
# Malformed XML containing duplicate elements causes attributes to parse as a list
for attr in ['type', 'number', 'congress']:
if type(item[attr]) is list:
item[attr] = item[attr][0]
return {
'amendment_id': "{0}{1}-{2}".format(item['type'].lower(), item['number'], item['congress']),
'amendment_type': item['type'].lower(),
'chamber': item['type'][0].lower(),
'number': item['number']
}
return [build_dict(amendment) for amendment in amendment_list]
def committee_reports_for(committeeReports):
ret = []
for report in (committeeReports or {}).get("committeeReport", []):
ret.append( report["citation"] )
return ret