mirror of
https://github.com/unitedstates/congress.git
synced 2026-03-25 14:00:05 -04:00
Run 2to3 over codebase
This commit is contained in:
@@ -19,7 +19,7 @@ You must include a 'beakstalk' section in config.yml with this structure
|
||||
votes: 'us_votes'
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import sys
|
||||
import logging
|
||||
@@ -64,7 +64,7 @@ def init_guard(reconnect=False):
|
||||
assert 'bills' in config['beanstalk']['tubes']
|
||||
assert 'amendments' in config['beanstalk']['tubes']
|
||||
assert 'votes' in config['beanstalk']['tubes']
|
||||
tube_names = config['beanstalk']['tubes'].values()
|
||||
tube_names = list(config['beanstalk']['tubes'].values())
|
||||
assert max(Counter(tube_names).values()) == 1, 'Must use unique beanstalk tube names.'
|
||||
_Config = config['beanstalk']
|
||||
if _Connection is None or reconnect is True:
|
||||
@@ -84,13 +84,13 @@ def process_bill_wrapper(process_bill):
|
||||
try:
|
||||
conn.use(config['tubes']['bills'])
|
||||
conn.put(bill)
|
||||
logging.warn(u"Queued {} to beanstalkd.".format(bill))
|
||||
logging.warn("Queued {} to beanstalkd.".format(bill))
|
||||
break
|
||||
except beanstalkc.SocketError:
|
||||
logging.warn(u"Lost connection to beanstalkd. Attempting to reconnect.")
|
||||
logging.warn("Lost connection to beanstalkd. Attempting to reconnect.")
|
||||
(conn, config) = init_guard(reconnect=True)
|
||||
except Exception as e:
|
||||
logging.warn(u"Ignored exception while queueing bill to beanstalkd: {0} {1}".format(unicode(type(e)), unicode(e)))
|
||||
logging.warn("Ignored exception while queueing bill to beanstalkd: {0} {1}".format(str(type(e)), str(e)))
|
||||
traceback.print_exc()
|
||||
break
|
||||
|
||||
@@ -110,13 +110,13 @@ def process_amendment_wrapper(process_amendment):
|
||||
try:
|
||||
conn.use(config['tubes']['amendments'])
|
||||
conn.put(str(amdt))
|
||||
logging.warn(u"Queued {} to beanstalkd.".format(amdt))
|
||||
logging.warn("Queued {} to beanstalkd.".format(amdt))
|
||||
break
|
||||
except beanstalkc.SocketError:
|
||||
logging.warn(u"Lost connection to beanstalkd. Attempting to reconnect.")
|
||||
logging.warn("Lost connection to beanstalkd. Attempting to reconnect.")
|
||||
(conn, config) = init_guard(reconnect=True)
|
||||
except Exception as e:
|
||||
logging.warn(u"Ignored exception while queueing amendment to beanstalkd: {0} {1}".format(unicode(type(e)), unicode(e)))
|
||||
logging.warn("Ignored exception while queueing amendment to beanstalkd: {0} {1}".format(str(type(e)), str(e)))
|
||||
traceback.print_exc()
|
||||
break
|
||||
|
||||
@@ -135,13 +135,13 @@ def output_vote_wrapper(output_vote):
|
||||
try:
|
||||
conn.use(config['tubes']['votes'])
|
||||
conn.put(vote['vote_id'])
|
||||
logging.warn(u'Queued {} to beanstalkd.'.format(vote['vote_id']))
|
||||
logging.warn('Queued {} to beanstalkd.'.format(vote['vote_id']))
|
||||
break
|
||||
except beanstalkc.SocketError:
|
||||
logging.warn(u'Lost connection to beanstalkd. Attempting to reconnect.')
|
||||
logging.warn('Lost connection to beanstalkd. Attempting to reconnect.')
|
||||
(conn, config) = init_guard(reconnect=True)
|
||||
except Exception as e:
|
||||
logging.warn(u'Ignored exception while queueing vote to beanstalkd: {0} {1}'.format(unicode(type(e)), unicode(e)))
|
||||
logging.warn('Ignored exception while queueing vote to beanstalkd: {0} {1}'.format(str(type(e)), str(e)))
|
||||
traceback.print_exc()
|
||||
break
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ def create_govtrack_xml(bill, options):
|
||||
if options.get("govtrack", False):
|
||||
# Rewrite bioguide_id attributes as just id with GovTrack person IDs.
|
||||
attrs2 = {}
|
||||
for k, v in attrs.items():
|
||||
for k, v in list(attrs.items()):
|
||||
if v:
|
||||
if k == "bioguide_id":
|
||||
# remap "bioguide_id" attributes to govtrack "id"
|
||||
@@ -43,7 +43,7 @@ def create_govtrack_xml(bill, options):
|
||||
elif k == "source_url":
|
||||
n.set("url", v)
|
||||
else:
|
||||
n.set(k, unicode(v))
|
||||
n.set(k, str(v))
|
||||
if "original_bill_number" in bill:
|
||||
make_node(root, "bill-number", bill["original_bill_number"])
|
||||
|
||||
@@ -328,7 +328,7 @@ def titles_for(title_list):
|
||||
titles_copy = list(titles) # clone before beginning sort
|
||||
def first_index_of(**kwargs):
|
||||
for i, title in enumerate(titles_copy):
|
||||
for k, v in kwargs.items():
|
||||
for k, v in list(kwargs.items()):
|
||||
k = k.replace("_", "")
|
||||
if title.get(k) != v:
|
||||
break
|
||||
@@ -1097,7 +1097,7 @@ def parse_bill_action(action_dict, prev_status, bill_id, title):
|
||||
|
||||
# sweep the action line for bill IDs of related bills
|
||||
bill_ids = utils.extract_bills(line, congress)
|
||||
bill_ids = filter(lambda b: b != bill_id, bill_ids)
|
||||
bill_ids = [b for b in bill_ids if b != bill_id]
|
||||
if bill_ids and (len(bill_ids) > 0):
|
||||
action['bill_ids'] = bill_ids
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ def process_bill(bill_id, options):
|
||||
|
||||
# Convert and write out data.json and data.xml.
|
||||
utils.write(
|
||||
unicode(json.dumps(bill_data, indent=2, sort_keys=True)),
|
||||
str(json.dumps(bill_data, indent=2, sort_keys=True)),
|
||||
os.path.dirname(fdsys_xml_path) + '/data.json')
|
||||
|
||||
from bill_info import create_govtrack_xml
|
||||
@@ -258,7 +258,7 @@ def reparse_actions(bill_id, options):
|
||||
import sys
|
||||
from difflib import unified_diff
|
||||
sys.stdout.writelines(unified_diff(split_lines(source), split_lines(revised), fromfile=fn, tofile=fn))
|
||||
return raw_input("Apply change? (y/n) ").strip() == "y"
|
||||
return input("Apply change? (y/n) ").strip() == "y"
|
||||
|
||||
wrote_any = False
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ import uuid
|
||||
import logging
|
||||
import mechanize
|
||||
import zipfile
|
||||
import StringIO
|
||||
import io
|
||||
import requests
|
||||
import subprocess
|
||||
|
||||
@@ -93,12 +93,12 @@ def fetch_senate_committee_meetings(committees, options):
|
||||
options))
|
||||
|
||||
for node in dom.xpath("meeting"):
|
||||
committee_id = unicode(node.xpath('string(cmte_code)'))
|
||||
committee_id = str(node.xpath('string(cmte_code)'))
|
||||
if committee_id.strip() == "":
|
||||
continue # "No committee hearings scheduled" placeholder
|
||||
occurs_at = unicode(node.xpath('string(date)'))
|
||||
room = unicode(node.xpath('string(room)'))
|
||||
topic = unicode(node.xpath('string(matter)'))
|
||||
occurs_at = str(node.xpath('string(date)'))
|
||||
room = str(node.xpath('string(room)'))
|
||||
topic = str(node.xpath('string(matter)'))
|
||||
|
||||
occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p")
|
||||
topic = re.sub(r"\s+", " ", topic).strip()
|
||||
@@ -113,7 +113,7 @@ def fetch_senate_committee_meetings(committees, options):
|
||||
if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]:
|
||||
raise ValueError(subcommittee_code)
|
||||
except:
|
||||
print("Invalid committee code", committee_id)
|
||||
print(("Invalid committee code", committee_id))
|
||||
continue
|
||||
|
||||
# See if this meeting already exists. If so, take its GUID.
|
||||
@@ -122,13 +122,13 @@ def fetch_senate_committee_meetings(committees, options):
|
||||
for mtg in existing_meetings:
|
||||
if mtg["committee"] == committee_code and mtg.get("subcommittee", None) == subcommittee_code and mtg["occurs_at"] == occurs_at.isoformat():
|
||||
if options.get("debug", False):
|
||||
print("[%s] Reusing gUID." % mtg["guid"])
|
||||
print(("[%s] Reusing gUID." % mtg["guid"]))
|
||||
guid = mtg["guid"]
|
||||
break
|
||||
else:
|
||||
# Not found, so create a new ID.
|
||||
# TODO: Can we make this a human-readable ID?
|
||||
guid = unicode(uuid.uuid4())
|
||||
guid = str(uuid.uuid4())
|
||||
|
||||
# Scrape the topic text for mentions of bill numbers.
|
||||
congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at))
|
||||
@@ -139,7 +139,7 @@ def fetch_senate_committee_meetings(committees, options):
|
||||
|
||||
# Create the meeting event.
|
||||
if options.get("debug", False):
|
||||
print("[senate][%s][%s] Found meeting in room %s at %s." % (committee_code, subcommittee_code, room, occurs_at.isoformat()))
|
||||
print(("[senate][%s][%s] Found meeting in room %s at %s." % (committee_code, subcommittee_code, room, occurs_at.isoformat())))
|
||||
|
||||
meetings.append({
|
||||
"chamber": "senate",
|
||||
@@ -153,7 +153,7 @@ def fetch_senate_committee_meetings(committees, options):
|
||||
"bill_ids": bills,
|
||||
})
|
||||
|
||||
print("[senate] Found %i meetings." % len(meetings))
|
||||
print(("[senate] Found %i meetings." % len(meetings)))
|
||||
return meetings
|
||||
|
||||
# House
|
||||
@@ -197,7 +197,7 @@ def fetch_house_committee_meetings(committees, options):
|
||||
# original start to loop
|
||||
for mtg in dom.xpath("channel/item"):
|
||||
|
||||
eventurl = unicode(mtg.xpath("string(link)"))
|
||||
eventurl = str(mtg.xpath("string(link)"))
|
||||
event_id = re.search(r"EventID=(\d+)$", eventurl)
|
||||
if not event_id: continue # weird empty event showed up
|
||||
event_id = event_id.group(1)
|
||||
@@ -217,7 +217,7 @@ def fetch_house_committee_meetings(committees, options):
|
||||
# if bad zipfile
|
||||
if load_xml_from_page == False: continue
|
||||
|
||||
print("[house] Found %i meetings." % len(meetings))
|
||||
print(("[house] Found %i meetings." % len(meetings)))
|
||||
return meetings
|
||||
|
||||
|
||||
@@ -245,7 +245,7 @@ def fetch_meeting_from_event_id(committees, options, load_id):
|
||||
if load_xml_from_page == False: continue
|
||||
current_id += 1
|
||||
|
||||
print("[house] Found %i meetings." % len(meetings))
|
||||
print(("[house] Found %i meetings." % len(meetings)))
|
||||
return meetings
|
||||
|
||||
|
||||
@@ -301,13 +301,13 @@ def extract_meeting_package(eventurl, event_id, options):
|
||||
try:
|
||||
dom = lxml.etree.fromstring(request.read())
|
||||
except lxml.etree.XMLSyntaxError as e:
|
||||
print(event_id, e)
|
||||
print((event_id, e))
|
||||
return False
|
||||
return {"witnesses": None, "uploaded_documents": [], "dom": dom}
|
||||
|
||||
## read zipfile
|
||||
try:
|
||||
request_bytes = StringIO.StringIO(request.read())
|
||||
request_bytes = io.StringIO(request.read())
|
||||
package = zipfile.ZipFile(request_bytes)
|
||||
except:
|
||||
message = "Problem downloading zipfile: %s" % (event_id)
|
||||
@@ -533,13 +533,13 @@ def parse_house_committee_meeting(event_id, dom, existing_meetings, committees,
|
||||
else:
|
||||
# Not found, so create a new ID.
|
||||
# TODO: when does this happen?
|
||||
guid = unicode(uuid.uuid4())
|
||||
guid = str(uuid.uuid4())
|
||||
|
||||
url = "http://docs.house.gov/Committee/Calendar/ByEvent.aspx?EventID=" + event_id
|
||||
|
||||
# return the parsed meeting
|
||||
if options.get("debug", False):
|
||||
print("[house][%s][%s] Found meeting in room %s at %s" % (committee_code, subcommittee_code, room, occurs_at.isoformat()))
|
||||
print(("[house][%s][%s] Found meeting in room %s at %s" % (committee_code, subcommittee_code, room, occurs_at.isoformat())))
|
||||
|
||||
results = {
|
||||
"chamber": "house",
|
||||
@@ -582,7 +582,7 @@ def save_documents(package, event_id):
|
||||
try:
|
||||
bytes = package.read(name)
|
||||
except:
|
||||
print("Did not save to disk: file %s" % (name))
|
||||
print(("Did not save to disk: file %s" % (name)))
|
||||
continue
|
||||
file_name = "%s/%s" % (output_dir, name)
|
||||
|
||||
@@ -651,7 +651,7 @@ def save_file(url, event_id):
|
||||
text_doc = text_from_pdf(file_name)
|
||||
return True
|
||||
except:
|
||||
print("Failed to save- %s" % (url))
|
||||
print(("Failed to save- %s" % (url)))
|
||||
return False
|
||||
else:
|
||||
logging.info("failed to fetch: " + url)
|
||||
|
||||
@@ -473,7 +473,7 @@ def get_output_path(collection, package_name, options):
|
||||
|
||||
|
||||
def unwrap_text_in_html(data):
|
||||
text_content = unicode(html.fromstring(data).text_content())
|
||||
text_content = str(html.fromstring(data).text_content())
|
||||
return text_content.encode("utf8")
|
||||
|
||||
|
||||
|
||||
@@ -77,14 +77,14 @@ def run(options):
|
||||
elif "volumes" in options:
|
||||
start, end = options["volumes"].split("-")
|
||||
to_fetch = []
|
||||
for v in xrange(int(start), int(end) + 1):
|
||||
for v in range(int(start), int(end) + 1):
|
||||
to_fetch.extend(glob.glob(root_dir + "/*/STATUTE-" + str(v)))
|
||||
elif "year" in options:
|
||||
to_fetch = glob.glob(root_dir + "/" + str(int(options["year"])) + "/STATUTE-*")
|
||||
elif "years" in options:
|
||||
start, end = options["years"].split("-")
|
||||
to_fetch = []
|
||||
for y in xrange(int(start), int(end) + 1):
|
||||
for y in range(int(start), int(end) + 1):
|
||||
to_fetch.extend(glob.glob(root_dir + "/" + str(y) + "/STATUTE-*"))
|
||||
else:
|
||||
to_fetch = sorted(glob.glob(root_dir + "/*/STATUTE-*"))
|
||||
|
||||
@@ -6,7 +6,7 @@ import traceback
|
||||
import zipfile
|
||||
import platform
|
||||
import re
|
||||
import htmlentitydefs
|
||||
import html.entities
|
||||
import json
|
||||
from pytz import timezone
|
||||
import datetime
|
||||
@@ -47,7 +47,7 @@ def format_datetime(obj):
|
||||
return eastern_time_zone.localize(obj.replace(microsecond=0)).isoformat()
|
||||
elif isinstance(obj, datetime.date):
|
||||
return obj.isoformat()
|
||||
elif isinstance(obj, (str, unicode)):
|
||||
elif isinstance(obj, str):
|
||||
return obj
|
||||
else:
|
||||
return None
|
||||
@@ -257,7 +257,7 @@ def download(url, destination=None, options={}):
|
||||
# archive.
|
||||
if destination and to_cache:
|
||||
dparts = destination.split(os.sep)
|
||||
for i in xrange(len(dparts) - 1):
|
||||
for i in range(len(dparts) - 1):
|
||||
# form the ZIP file name and test if it exists...
|
||||
zfn = os.path.join(cache, *dparts[:i + 1]) + ".zip"
|
||||
if not os.path.exists(zfn):
|
||||
@@ -317,11 +317,11 @@ def download(url, destination=None, options={}):
|
||||
|
||||
if not is_binary:
|
||||
body = response.text # a subclass of a 'unicode' instance
|
||||
if not isinstance(body, unicode):
|
||||
if not isinstance(body, str):
|
||||
raise ValueError("Content not decoded.")
|
||||
else:
|
||||
body = response.content # a 'str' instance
|
||||
if isinstance(body, unicode):
|
||||
if isinstance(body, str):
|
||||
raise ValueError("Binary content improperly decoded.")
|
||||
except scrapelib.HTTPError as e:
|
||||
logging.error("Error downloading %s:\n\n%s" % (url, format_exception(e)))
|
||||
@@ -438,7 +438,7 @@ def xpath_regex(doc, element, pattern):
|
||||
def unescape(text):
|
||||
|
||||
def remove_unicode_control(str):
|
||||
remove_re = re.compile(u'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]')
|
||||
remove_re = re.compile('[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]')
|
||||
return remove_re.sub('', str)
|
||||
|
||||
def fixup(m):
|
||||
@@ -447,15 +447,15 @@ def unescape(text):
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
return unichr(int(text[3:-1], 16))
|
||||
return chr(int(text[3:-1], 16))
|
||||
else:
|
||||
return unichr(int(text[2:-1]))
|
||||
return chr(int(text[2:-1]))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||||
text = chr(html.entities.name2codepoint[text[1:-1]])
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
@@ -532,7 +532,7 @@ def admin(body):
|
||||
|
||||
except Exception as exception:
|
||||
print("Exception logging message to admin, halting as to avoid loop")
|
||||
print(format_exception(exception))
|
||||
print((format_exception(exception)))
|
||||
|
||||
|
||||
def format_exception(exception):
|
||||
@@ -572,7 +572,7 @@ def make_node(parent, tag, text, **attrs):
|
||||
n = etree.Element(tag)
|
||||
parent.append(n)
|
||||
n.text = text
|
||||
for k, v in attrs.items():
|
||||
for k, v in list(attrs.items()):
|
||||
if v is None:
|
||||
continue
|
||||
if isinstance(v, datetime.datetime):
|
||||
@@ -734,16 +734,16 @@ def lookup_legislator(congress, role_type, name, state, party, when, id_requeste
|
||||
for filename in ("legislators-historical", "legislators-current"):
|
||||
for moc in yaml_load("congress-legislators/%s.yaml" % (filename)):
|
||||
for term in moc["terms"]:
|
||||
for c in xrange(congress_from_legislative_year(int(term['start'][0:4])) - 1,
|
||||
for c in range(congress_from_legislative_year(int(term['start'][0:4])) - 1,
|
||||
congress_from_legislative_year(int(term['end'][0:4])) + 1 + 1):
|
||||
lookup_legislator_cache.setdefault(c, []).append((moc, term))
|
||||
|
||||
def to_ascii(name):
|
||||
name = name.replace("-", " ")
|
||||
if not isinstance(name, unicode):
|
||||
if not isinstance(name, str):
|
||||
return name
|
||||
import unicodedata
|
||||
return u"".join(c for c in unicodedata.normalize('NFKD', name) if not unicodedata.combining(c))
|
||||
return "".join(c for c in unicodedata.normalize('NFKD', name) if not unicodedata.combining(c))
|
||||
|
||||
# Scan all of the terms that cover 'when' for a match.
|
||||
if isinstance(when, datetime.datetime):
|
||||
@@ -833,7 +833,7 @@ def translate_legislator_id(source_id_type, source_id, dest_id_type):
|
||||
_translate_legislator_id_cache = { }
|
||||
for filename in ("legislators-historical", "legislators-current"):
|
||||
for moc in yaml_load("congress-legislators/%s.yaml" % (filename)):
|
||||
for id_type, id_value in moc["id"].items():
|
||||
for id_type, id_value in list(moc["id"].items()):
|
||||
try:
|
||||
_translate_legislator_id_cache[(id_type, id_value)] = moc['id']
|
||||
except TypeError:
|
||||
@@ -871,6 +871,6 @@ class NoInterrupt(object):
|
||||
for sig in self.sigs:
|
||||
signal.signal(sig, self.old_handlers[sig])
|
||||
# Issue the signals caught during the with-block.
|
||||
for sig, args in self.signal_received.items():
|
||||
for sig, args in list(self.signal_received.items()):
|
||||
if self.old_handlers[sig]:
|
||||
self.old_handlers[sig](*args)
|
||||
|
||||
@@ -132,7 +132,7 @@ def output_vote(vote, options, id_type=None):
|
||||
# preferred order of output: ayes, nays, present, then not voting, and similarly for guilty/not-guilty
|
||||
# and handling other options like people's names for votes for the Speaker.
|
||||
option_sort_order = ('Aye', 'Yea', 'Guilty', 'No', 'Nay', 'Not Guilty', 'OTHER', 'Present', 'Not Voting')
|
||||
options_list = sorted(vote["votes"].keys(), key=lambda o: option_sort_order.index(o) if o in option_sort_order else option_sort_order.index("OTHER"))
|
||||
options_list = sorted(list(vote["votes"].keys()), key=lambda o: option_sort_order.index(o) if o in option_sort_order else option_sort_order.index("OTHER"))
|
||||
for option in options_list:
|
||||
if option not in option_keys:
|
||||
option_keys[option] = option
|
||||
@@ -180,18 +180,18 @@ def parse_senate_vote(dom, vote):
|
||||
vote["date"] = parse_date(dom.xpath("string(vote_date)"))
|
||||
if len(dom.xpath("modify_date")) > 0:
|
||||
vote["record_modified"] = parse_date(dom.xpath("string(modify_date)")) # some votes like s1-110.2008 don't have a modify_date
|
||||
vote["question"] = unicode(dom.xpath("string(vote_question_text)"))
|
||||
vote["question"] = str(dom.xpath("string(vote_question_text)"))
|
||||
if vote["question"] == "":
|
||||
vote["question"] = unicode(dom.xpath("string(question)")) # historical votes?
|
||||
vote["type"] = unicode(dom.xpath("string(vote_question)"))
|
||||
vote["question"] = str(dom.xpath("string(question)")) # historical votes?
|
||||
vote["type"] = str(dom.xpath("string(vote_question)"))
|
||||
if vote["type"] == "":
|
||||
vote["type"] = vote["question"]
|
||||
vote["type"] = normalize_vote_type(vote["type"])
|
||||
vote["category"] = get_vote_category(vote["type"])
|
||||
vote["subject"] = unicode(dom.xpath("string(vote_title)"))
|
||||
vote["requires"] = unicode(dom.xpath("string(majority_requirement)"))
|
||||
vote["result_text"] = unicode(dom.xpath("string(vote_result_text)"))
|
||||
vote["result"] = unicode(dom.xpath("string(vote_result)"))
|
||||
vote["subject"] = str(dom.xpath("string(vote_title)"))
|
||||
vote["requires"] = str(dom.xpath("string(majority_requirement)"))
|
||||
vote["result_text"] = str(dom.xpath("string(vote_result_text)"))
|
||||
vote["result"] = str(dom.xpath("string(vote_result)"))
|
||||
|
||||
# Senate cloture votes have consistently bad vote_question_text values: They don't say what the cloture
|
||||
# was about specifically, just what bill was relevant. So cloture on an amendment just appears as
|
||||
@@ -218,23 +218,23 @@ def parse_senate_vote(dom, vote):
|
||||
|
||||
bill_types = {"S.": "s", "S.Con.Res.": "sconres", "S.J.Res.": "sjres", "S.Res.": "sres", "H.R.": "hr", "H.Con.Res.": "hconres", "H.J.Res.": "hjres", "H.Res.": "hres"}
|
||||
|
||||
if unicode(dom.xpath("string(document/document_type)")):
|
||||
if str(dom.xpath("string(document/document_type)")):
|
||||
if dom.xpath("string(document/document_type)") == "PN":
|
||||
vote["nomination"] = {
|
||||
"number": unicode(dom.xpath("string(document/document_number)")),
|
||||
"title": unicode(dom.xpath("string(document/document_title)")),
|
||||
"number": str(dom.xpath("string(document/document_number)")),
|
||||
"title": str(dom.xpath("string(document/document_title)")),
|
||||
}
|
||||
vote["question"] += ": " + vote["nomination"]["title"]
|
||||
elif dom.xpath("string(document/document_type)") == "Treaty Doc.":
|
||||
vote["treaty"] = {
|
||||
"title": unicode(dom.xpath("string(document/document_title)")),
|
||||
"title": str(dom.xpath("string(document/document_title)")),
|
||||
}
|
||||
elif unicode(dom.xpath("string(document/document_type)")) in bill_types:
|
||||
elif str(dom.xpath("string(document/document_type)")) in bill_types:
|
||||
vote["bill"] = {
|
||||
"congress": int(dom.xpath("number(document/document_congress|congress)")), # some historical files don't have document/document_congress so take the first of document/document_congress or the top-level congress element as a fall-back
|
||||
"type": bill_types[unicode(dom.xpath("string(document/document_type)"))],
|
||||
"type": bill_types[str(dom.xpath("string(document/document_type)"))],
|
||||
"number": int(dom.xpath("number(document/document_number)")),
|
||||
"title": unicode(dom.xpath("string(document/document_title)")),
|
||||
"title": str(dom.xpath("string(document/document_title)")),
|
||||
}
|
||||
else:
|
||||
# s294-115.2017 through s302-115.2017 have S.Amdt. in document_type,
|
||||
@@ -242,16 +242,16 @@ def parse_senate_vote(dom, vote):
|
||||
# the rest of <document> is blank.
|
||||
pass
|
||||
|
||||
if unicode(dom.xpath("string(amendment/amendment_number)")):
|
||||
m = re.match(r"^S.Amdt. (\d+)", unicode(dom.xpath("string(amendment/amendment_number)")))
|
||||
if str(dom.xpath("string(amendment/amendment_number)")):
|
||||
m = re.match(r"^S.Amdt. (\d+)", str(dom.xpath("string(amendment/amendment_number)")))
|
||||
if m:
|
||||
vote["amendment"] = {
|
||||
"type": "s",
|
||||
"number": int(m.group(1)),
|
||||
"purpose": unicode(dom.xpath("string(amendment/amendment_purpose)")),
|
||||
"purpose": str(dom.xpath("string(amendment/amendment_purpose)")),
|
||||
}
|
||||
|
||||
amendment_to = unicode(dom.xpath("string(amendment/amendment_to_document_number)"))
|
||||
amendment_to = str(dom.xpath("string(amendment/amendment_to_document_number)"))
|
||||
if "Treaty" in amendment_to:
|
||||
treaty, number = amendment_to.split("-")
|
||||
vote["treaty"] = {
|
||||
@@ -264,7 +264,7 @@ def parse_senate_vote(dom, vote):
|
||||
"congress": vote["congress"],
|
||||
"type": bill_types[bill_type],
|
||||
"number": int(bill_number),
|
||||
"title": unicode(dom.xpath("string(amendment/amendment_to_document_short_title)")),
|
||||
"title": str(dom.xpath("string(amendment/amendment_to_document_short_title)")),
|
||||
}
|
||||
else:
|
||||
# Senate votes:
|
||||
@@ -289,7 +289,7 @@ def parse_senate_vote(dom, vote):
|
||||
logging.info("[%s] Missing lis_member_id, falling back to name lookup for %s" % (vote["vote_id"], voter["last_name"]))
|
||||
|
||||
# Ensure the options are noted, even if no one votes that way.
|
||||
if unicode(dom.xpath("string(question)")) == "Guilty or Not Guilty":
|
||||
if str(dom.xpath("string(question)")) == "Guilty or Not Guilty":
|
||||
vote["votes"]['Guilty'] = []
|
||||
vote["votes"]['Not Guilty'] = []
|
||||
else:
|
||||
@@ -307,7 +307,7 @@ def parse_senate_vote(dom, vote):
|
||||
"id": str(member.xpath("string(lis_member_id)")),
|
||||
"state": str(member.xpath("string(state)")),
|
||||
"party": str(member.xpath("string(party)")),
|
||||
"display_name": unicode(member.xpath("string(member_full)")),
|
||||
"display_name": str(member.xpath("string(member_full)")),
|
||||
"first_name": str(member.xpath("string(first_name)")),
|
||||
"last_name": str(member.xpath("string(last_name)")),
|
||||
})
|
||||
@@ -323,14 +323,14 @@ def parse_house_vote(dom, vote):
|
||||
return datetime.datetime.strptime(d, "%d-%b-%Y")
|
||||
|
||||
vote["date"] = parse_date(str(dom.xpath("string(vote-metadata/action-date)")) + " " + str(dom.xpath("string(vote-metadata/action-time)")))
|
||||
vote["question"] = unicode(dom.xpath("string(vote-metadata/vote-question)"))
|
||||
vote["type"] = unicode(dom.xpath("string(vote-metadata/vote-question)"))
|
||||
vote["question"] = str(dom.xpath("string(vote-metadata/vote-question)"))
|
||||
vote["type"] = str(dom.xpath("string(vote-metadata/vote-question)"))
|
||||
vote["type"] = normalize_vote_type(vote["type"])
|
||||
if unicode(dom.xpath("string(vote-metadata/vote-desc)")).startswith("Impeaching "):
|
||||
if str(dom.xpath("string(vote-metadata/vote-desc)")).startswith("Impeaching "):
|
||||
vote["category"] = "impeachment"
|
||||
else:
|
||||
vote["category"] = get_vote_category(vote["question"])
|
||||
vote["subject"] = unicode(dom.xpath("string(vote-metadata/vote-desc)"))
|
||||
vote["subject"] = str(dom.xpath("string(vote-metadata/vote-desc)"))
|
||||
if not vote["subject"]:
|
||||
del vote["subject"]
|
||||
|
||||
@@ -338,10 +338,10 @@ def parse_house_vote(dom, vote):
|
||||
vote_types = {"YEA-AND-NAY": "1/2", "2/3 YEA-AND-NAY": "2/3", "3/5 YEA-AND-NAY": "3/5", "1/2": "1/2", "2/3": "2/3", "QUORUM": "QUORUM", "RECORDED VOTE": "1/2", "2/3 RECORDED VOTE": "2/3", "3/5 RECORDED VOTE": "3/5"}
|
||||
vote["requires"] = vote_types.get(str(dom.xpath("string(vote-metadata/vote-type)")), "unknown")
|
||||
|
||||
vote["result_text"] = unicode(dom.xpath("string(vote-metadata/vote-result)"))
|
||||
vote["result"] = unicode(dom.xpath("string(vote-metadata/vote-result)"))
|
||||
vote["result_text"] = str(dom.xpath("string(vote-metadata/vote-result)"))
|
||||
vote["result"] = str(dom.xpath("string(vote-metadata/vote-result)"))
|
||||
|
||||
bill_num = unicode(dom.xpath("string(vote-metadata/legis-num)"))
|
||||
bill_num = str(dom.xpath("string(vote-metadata/legis-num)"))
|
||||
if bill_num not in ("", "QUORUM", "JOURNAL", "MOTION", "ADJOURN") and not re.match(r"QUORUM \d+$", bill_num):
|
||||
bill_types = {"S": "s", "S CON RES": "sconres", "S J RES": "sjres", "S RES": "sres", "H R": "hr", "H CON RES": "hconres", "H J RES": "hjres", "H RES": "hres"}
|
||||
try:
|
||||
@@ -358,16 +358,16 @@ def parse_house_vote(dom, vote):
|
||||
vote["amendment"] = {
|
||||
"type": "h-bill",
|
||||
"number": int(str(dom.xpath("string(vote-metadata/amendment-num)"))),
|
||||
"author": unicode(dom.xpath("string(vote-metadata/amendment-author)")),
|
||||
"author": str(dom.xpath("string(vote-metadata/amendment-author)")),
|
||||
}
|
||||
|
||||
# Assemble a complete question from the vote type, amendment, and bill number.
|
||||
if "amendment" in vote and "bill" in vote:
|
||||
vote["question"] += ": Amendment %s to %s" % (vote["amendment"]["number"], unicode(dom.xpath("string(vote-metadata/legis-num)")))
|
||||
vote["question"] += ": Amendment %s to %s" % (vote["amendment"]["number"], str(dom.xpath("string(vote-metadata/legis-num)")))
|
||||
elif "amendment" in vote:
|
||||
vote["question"] += ": Amendment %s to [unknown bill]" % vote["amendment"]["number"]
|
||||
elif "bill" in vote:
|
||||
vote["question"] += ": " + unicode(dom.xpath("string(vote-metadata/legis-num)"))
|
||||
vote["question"] += ": " + str(dom.xpath("string(vote-metadata/legis-num)"))
|
||||
if "subject" in vote:
|
||||
vote["question"] += " " + vote["subject"]
|
||||
elif "subject" in vote:
|
||||
@@ -380,10 +380,10 @@ def parse_house_vote(dom, vote):
|
||||
vote["votes"].setdefault(vote_option, []).append(voter)
|
||||
|
||||
# Ensure the options are noted, even if no one votes that way.
|
||||
if unicode(dom.xpath("string(vote-metadata/vote-question)")) == "Election of the Speaker":
|
||||
if str(dom.xpath("string(vote-metadata/vote-question)")) == "Election of the Speaker":
|
||||
for n in dom.xpath('vote-metadata/vote-totals/totals-by-candidate/candidate'):
|
||||
vote["votes"][n.text] = []
|
||||
elif unicode(dom.xpath("string(vote-metadata/vote-question)")) == "Call of the House":
|
||||
elif str(dom.xpath("string(vote-metadata/vote-question)")) == "Call of the House":
|
||||
for n in dom.xpath('vote-metadata/vote-totals/totals-by-candidate/candidate'):
|
||||
vote["votes"][n.text] = []
|
||||
elif "YEA-AND-NAY" in dom.xpath('string(vote-metadata/vote-type)'):
|
||||
@@ -398,7 +398,7 @@ def parse_house_vote(dom, vote):
|
||||
vote["votes"]['Not Voting'] = []
|
||||
|
||||
for member in dom.xpath("vote-data/recorded-vote"):
|
||||
display_name = unicode(member.xpath("string(legislator)"))
|
||||
display_name = str(member.xpath("string(legislator)"))
|
||||
state = str(member.xpath("string(legislator/@state)"))
|
||||
party = str(member.xpath("string(legislator/@party)"))
|
||||
vote_cast = str(member.xpath("string(vote)"))
|
||||
@@ -424,7 +424,7 @@ def parse_house_vote(dom, vote):
|
||||
# See https://github.com/unitedstates/congress/issues/46.
|
||||
|
||||
seen_ids = set()
|
||||
all_voters = sum(vote["votes"].values(), [])
|
||||
all_voters = sum(list(vote["votes"].values()), [])
|
||||
all_voters.sort(key=lambda v: len(v["display_name"]), reverse=True) # process longer names first
|
||||
for v in all_voters:
|
||||
if v["id"] not in ("", "0000000"):
|
||||
|
||||
@@ -5,7 +5,7 @@ import datetime
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import urlparse
|
||||
import urllib.parse
|
||||
import time
|
||||
import datetime
|
||||
from lxml import html, etree
|
||||
@@ -90,7 +90,7 @@ def vote_ids_for_house(congress, session_year, options):
|
||||
|
||||
# download inside page, find the matching links
|
||||
page = utils.download(
|
||||
urlparse.urljoin(index_page, link.get("href")),
|
||||
urllib.parse.urljoin(index_page, link.get("href")),
|
||||
"%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp),
|
||||
options)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import re
|
||||
import StringIO
|
||||
import io
|
||||
import csv
|
||||
import datetime
|
||||
import time
|
||||
@@ -22,7 +22,7 @@ def run(options):
|
||||
chamber = options.get('chamber', None)
|
||||
|
||||
# we're going to need to map votes to sessions because in modern history the numbering resets by session
|
||||
session_dates = list(csv.DictReader(StringIO.StringIO(utils.download("http://www.govtrack.us/data/us/sessions.tsv").encode("utf8")), delimiter="\t"))
|
||||
session_dates = list(csv.DictReader(io.StringIO(utils.download("http://www.govtrack.us/data/us/sessions.tsv").encode("utf8")), delimiter="\t"))
|
||||
|
||||
# download the vote data now
|
||||
if chamber and chamber in [ "h", "s" ]:
|
||||
@@ -462,7 +462,7 @@ def build_votes(vote_list):
|
||||
})
|
||||
|
||||
# sort for output
|
||||
for voters in votes.values():
|
||||
for voters in list(votes.values()):
|
||||
voters.sort(key=lambda v: v['display_name'])
|
||||
|
||||
return (votes, presidents_positions)
|
||||
|
||||
@@ -47,7 +47,7 @@ class HearingInfo(unittest.TestCase):
|
||||
self.assertEqual(test_output['room'], 'CAPITOL H-313')
|
||||
self.assertEqual(test_output['subcommittee'], None)
|
||||
self.assertEqual(test_output[
|
||||
'topic'], u'H.R. 4435\u2014National Defense Authorization Act for Fiscal Year 2015 [General Debate]; H.R. 4660\u2014Commerce, Justice, Science, and Related Agencies Appropriations Act, 2015')
|
||||
'topic'], 'H.R. 4435\u2014National Defense Authorization Act for Fiscal Year 2015 [General Debate]; H.R. 4660\u2014Commerce, Justice, Science, and Related Agencies Appropriations Act, 2015')
|
||||
self.assertEqual(test_output[
|
||||
'url'], 'http://docs.house.gov/Committee/Calendar/ByEvent.aspx?EventID=102252')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user