diff --git a/contrib/beanstalkd.py b/contrib/beanstalkd.py index d3a5892..12a872b 100644 --- a/contrib/beanstalkd.py +++ b/contrib/beanstalkd.py @@ -19,7 +19,7 @@ You must include a 'beakstalk' section in config.yml with this structure votes: 'us_votes' """ -from __future__ import print_function + import sys import logging @@ -64,7 +64,7 @@ def init_guard(reconnect=False): assert 'bills' in config['beanstalk']['tubes'] assert 'amendments' in config['beanstalk']['tubes'] assert 'votes' in config['beanstalk']['tubes'] - tube_names = config['beanstalk']['tubes'].values() + tube_names = list(config['beanstalk']['tubes'].values()) assert max(Counter(tube_names).values()) == 1, 'Must use unique beanstalk tube names.' _Config = config['beanstalk'] if _Connection is None or reconnect is True: @@ -84,13 +84,13 @@ def process_bill_wrapper(process_bill): try: conn.use(config['tubes']['bills']) conn.put(bill) - logging.warn(u"Queued {} to beanstalkd.".format(bill)) + logging.warn("Queued {} to beanstalkd.".format(bill)) break except beanstalkc.SocketError: - logging.warn(u"Lost connection to beanstalkd. Attempting to reconnect.") + logging.warn("Lost connection to beanstalkd. Attempting to reconnect.") (conn, config) = init_guard(reconnect=True) except Exception as e: - logging.warn(u"Ignored exception while queueing bill to beanstalkd: {0} {1}".format(unicode(type(e)), unicode(e))) + logging.warn("Ignored exception while queueing bill to beanstalkd: {0} {1}".format(str(type(e)), str(e))) traceback.print_exc() break @@ -110,13 +110,13 @@ def process_amendment_wrapper(process_amendment): try: conn.use(config['tubes']['amendments']) conn.put(str(amdt)) - logging.warn(u"Queued {} to beanstalkd.".format(amdt)) + logging.warn("Queued {} to beanstalkd.".format(amdt)) break except beanstalkc.SocketError: - logging.warn(u"Lost connection to beanstalkd. Attempting to reconnect.") + logging.warn("Lost connection to beanstalkd. Attempting to reconnect.") (conn, config) = init_guard(reconnect=True) except Exception as e: - logging.warn(u"Ignored exception while queueing amendment to beanstalkd: {0} {1}".format(unicode(type(e)), unicode(e))) + logging.warn("Ignored exception while queueing amendment to beanstalkd: {0} {1}".format(str(type(e)), str(e))) traceback.print_exc() break @@ -135,13 +135,13 @@ def output_vote_wrapper(output_vote): try: conn.use(config['tubes']['votes']) conn.put(vote['vote_id']) - logging.warn(u'Queued {} to beanstalkd.'.format(vote['vote_id'])) + logging.warn('Queued {} to beanstalkd.'.format(vote['vote_id'])) break except beanstalkc.SocketError: - logging.warn(u'Lost connection to beanstalkd. Attempting to reconnect.') + logging.warn('Lost connection to beanstalkd. Attempting to reconnect.') (conn, config) = init_guard(reconnect=True) except Exception as e: - logging.warn(u'Ignored exception while queueing vote to beanstalkd: {0} {1}'.format(unicode(type(e)), unicode(e))) + logging.warn('Ignored exception while queueing vote to beanstalkd: {0} {1}'.format(str(type(e)), str(e))) traceback.print_exc() break diff --git a/tasks/bill_info.py b/tasks/bill_info.py index febb1a8..58a672c 100644 --- a/tasks/bill_info.py +++ b/tasks/bill_info.py @@ -19,7 +19,7 @@ def create_govtrack_xml(bill, options): if options.get("govtrack", False): # Rewrite bioguide_id attributes as just id with GovTrack person IDs. attrs2 = {} - for k, v in attrs.items(): + for k, v in list(attrs.items()): if v: if k == "bioguide_id": # remap "bioguide_id" attributes to govtrack "id" @@ -43,7 +43,7 @@ def create_govtrack_xml(bill, options): elif k == "source_url": n.set("url", v) else: - n.set(k, unicode(v)) + n.set(k, str(v)) if "original_bill_number" in bill: make_node(root, "bill-number", bill["original_bill_number"]) @@ -328,7 +328,7 @@ def titles_for(title_list): titles_copy = list(titles) # clone before beginning sort def first_index_of(**kwargs): for i, title in enumerate(titles_copy): - for k, v in kwargs.items(): + for k, v in list(kwargs.items()): k = k.replace("_", "") if title.get(k) != v: break @@ -1097,7 +1097,7 @@ def parse_bill_action(action_dict, prev_status, bill_id, title): # sweep the action line for bill IDs of related bills bill_ids = utils.extract_bills(line, congress) - bill_ids = filter(lambda b: b != bill_id, bill_ids) + bill_ids = [b for b in bill_ids if b != bill_id] if bill_ids and (len(bill_ids) > 0): action['bill_ids'] = bill_ids diff --git a/tasks/bills.py b/tasks/bills.py index 76e28b6..c0e4d30 100644 --- a/tasks/bills.py +++ b/tasks/bills.py @@ -108,7 +108,7 @@ def process_bill(bill_id, options): # Convert and write out data.json and data.xml. utils.write( - unicode(json.dumps(bill_data, indent=2, sort_keys=True)), + str(json.dumps(bill_data, indent=2, sort_keys=True)), os.path.dirname(fdsys_xml_path) + '/data.json') from bill_info import create_govtrack_xml @@ -258,7 +258,7 @@ def reparse_actions(bill_id, options): import sys from difflib import unified_diff sys.stdout.writelines(unified_diff(split_lines(source), split_lines(revised), fromfile=fn, tofile=fn)) - return raw_input("Apply change? (y/n) ").strip() == "y" + return input("Apply change? (y/n) ").strip() == "y" wrote_any = False diff --git a/tasks/committee_meetings.py b/tasks/committee_meetings.py index cd125fe..b23c371 100644 --- a/tasks/committee_meetings.py +++ b/tasks/committee_meetings.py @@ -9,7 +9,7 @@ import uuid import logging import mechanize import zipfile -import StringIO +import io import requests import subprocess @@ -93,12 +93,12 @@ def fetch_senate_committee_meetings(committees, options): options)) for node in dom.xpath("meeting"): - committee_id = unicode(node.xpath('string(cmte_code)')) + committee_id = str(node.xpath('string(cmte_code)')) if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder - occurs_at = unicode(node.xpath('string(date)')) - room = unicode(node.xpath('string(room)')) - topic = unicode(node.xpath('string(matter)')) + occurs_at = str(node.xpath('string(date)')) + room = str(node.xpath('string(room)')) + topic = str(node.xpath('string(matter)')) occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p") topic = re.sub(r"\s+", " ", topic).strip() @@ -113,7 +113,7 @@ def fetch_senate_committee_meetings(committees, options): if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]: raise ValueError(subcommittee_code) except: - print("Invalid committee code", committee_id) + print(("Invalid committee code", committee_id)) continue # See if this meeting already exists. If so, take its GUID. @@ -122,13 +122,13 @@ def fetch_senate_committee_meetings(committees, options): for mtg in existing_meetings: if mtg["committee"] == committee_code and mtg.get("subcommittee", None) == subcommittee_code and mtg["occurs_at"] == occurs_at.isoformat(): if options.get("debug", False): - print("[%s] Reusing gUID." % mtg["guid"]) + print(("[%s] Reusing gUID." % mtg["guid"])) guid = mtg["guid"] break else: # Not found, so create a new ID. # TODO: Can we make this a human-readable ID? - guid = unicode(uuid.uuid4()) + guid = str(uuid.uuid4()) # Scrape the topic text for mentions of bill numbers. congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at)) @@ -139,7 +139,7 @@ def fetch_senate_committee_meetings(committees, options): # Create the meeting event. if options.get("debug", False): - print("[senate][%s][%s] Found meeting in room %s at %s." % (committee_code, subcommittee_code, room, occurs_at.isoformat())) + print(("[senate][%s][%s] Found meeting in room %s at %s." % (committee_code, subcommittee_code, room, occurs_at.isoformat()))) meetings.append({ "chamber": "senate", @@ -153,7 +153,7 @@ def fetch_senate_committee_meetings(committees, options): "bill_ids": bills, }) - print("[senate] Found %i meetings." % len(meetings)) + print(("[senate] Found %i meetings." % len(meetings))) return meetings # House @@ -197,7 +197,7 @@ def fetch_house_committee_meetings(committees, options): # original start to loop for mtg in dom.xpath("channel/item"): - eventurl = unicode(mtg.xpath("string(link)")) + eventurl = str(mtg.xpath("string(link)")) event_id = re.search(r"EventID=(\d+)$", eventurl) if not event_id: continue # weird empty event showed up event_id = event_id.group(1) @@ -217,7 +217,7 @@ def fetch_house_committee_meetings(committees, options): # if bad zipfile if load_xml_from_page == False: continue - print("[house] Found %i meetings." % len(meetings)) + print(("[house] Found %i meetings." % len(meetings))) return meetings @@ -245,7 +245,7 @@ def fetch_meeting_from_event_id(committees, options, load_id): if load_xml_from_page == False: continue current_id += 1 - print("[house] Found %i meetings." % len(meetings)) + print(("[house] Found %i meetings." % len(meetings))) return meetings @@ -301,13 +301,13 @@ def extract_meeting_package(eventurl, event_id, options): try: dom = lxml.etree.fromstring(request.read()) except lxml.etree.XMLSyntaxError as e: - print(event_id, e) + print((event_id, e)) return False return {"witnesses": None, "uploaded_documents": [], "dom": dom} ## read zipfile try: - request_bytes = StringIO.StringIO(request.read()) + request_bytes = io.StringIO(request.read()) package = zipfile.ZipFile(request_bytes) except: message = "Problem downloading zipfile: %s" % (event_id) @@ -533,13 +533,13 @@ def parse_house_committee_meeting(event_id, dom, existing_meetings, committees, else: # Not found, so create a new ID. # TODO: when does this happen? - guid = unicode(uuid.uuid4()) + guid = str(uuid.uuid4()) url = "http://docs.house.gov/Committee/Calendar/ByEvent.aspx?EventID=" + event_id # return the parsed meeting if options.get("debug", False): - print("[house][%s][%s] Found meeting in room %s at %s" % (committee_code, subcommittee_code, room, occurs_at.isoformat())) + print(("[house][%s][%s] Found meeting in room %s at %s" % (committee_code, subcommittee_code, room, occurs_at.isoformat()))) results = { "chamber": "house", @@ -582,7 +582,7 @@ def save_documents(package, event_id): try: bytes = package.read(name) except: - print("Did not save to disk: file %s" % (name)) + print(("Did not save to disk: file %s" % (name))) continue file_name = "%s/%s" % (output_dir, name) @@ -651,7 +651,7 @@ def save_file(url, event_id): text_doc = text_from_pdf(file_name) return True except: - print("Failed to save- %s" % (url)) + print(("Failed to save- %s" % (url))) return False else: logging.info("failed to fetch: " + url) diff --git a/tasks/govinfo.py b/tasks/govinfo.py index 6d88a0d..f1cc750 100644 --- a/tasks/govinfo.py +++ b/tasks/govinfo.py @@ -473,7 +473,7 @@ def get_output_path(collection, package_name, options): def unwrap_text_in_html(data): - text_content = unicode(html.fromstring(data).text_content()) + text_content = str(html.fromstring(data).text_content()) return text_content.encode("utf8") diff --git a/tasks/statutes.py b/tasks/statutes.py index c45cad5..a41fa9e 100755 --- a/tasks/statutes.py +++ b/tasks/statutes.py @@ -77,14 +77,14 @@ def run(options): elif "volumes" in options: start, end = options["volumes"].split("-") to_fetch = [] - for v in xrange(int(start), int(end) + 1): + for v in range(int(start), int(end) + 1): to_fetch.extend(glob.glob(root_dir + "/*/STATUTE-" + str(v))) elif "year" in options: to_fetch = glob.glob(root_dir + "/" + str(int(options["year"])) + "/STATUTE-*") elif "years" in options: start, end = options["years"].split("-") to_fetch = [] - for y in xrange(int(start), int(end) + 1): + for y in range(int(start), int(end) + 1): to_fetch.extend(glob.glob(root_dir + "/" + str(y) + "/STATUTE-*")) else: to_fetch = sorted(glob.glob(root_dir + "/*/STATUTE-*")) diff --git a/tasks/utils.py b/tasks/utils.py index 503238a..b75c975 100644 --- a/tasks/utils.py +++ b/tasks/utils.py @@ -6,7 +6,7 @@ import traceback import zipfile import platform import re -import htmlentitydefs +import html.entities import json from pytz import timezone import datetime @@ -47,7 +47,7 @@ def format_datetime(obj): return eastern_time_zone.localize(obj.replace(microsecond=0)).isoformat() elif isinstance(obj, datetime.date): return obj.isoformat() - elif isinstance(obj, (str, unicode)): + elif isinstance(obj, str): return obj else: return None @@ -257,7 +257,7 @@ def download(url, destination=None, options={}): # archive. if destination and to_cache: dparts = destination.split(os.sep) - for i in xrange(len(dparts) - 1): + for i in range(len(dparts) - 1): # form the ZIP file name and test if it exists... zfn = os.path.join(cache, *dparts[:i + 1]) + ".zip" if not os.path.exists(zfn): @@ -317,11 +317,11 @@ def download(url, destination=None, options={}): if not is_binary: body = response.text # a subclass of a 'unicode' instance - if not isinstance(body, unicode): + if not isinstance(body, str): raise ValueError("Content not decoded.") else: body = response.content # a 'str' instance - if isinstance(body, unicode): + if isinstance(body, str): raise ValueError("Binary content improperly decoded.") except scrapelib.HTTPError as e: logging.error("Error downloading %s:\n\n%s" % (url, format_exception(e))) @@ -438,7 +438,7 @@ def xpath_regex(doc, element, pattern): def unescape(text): def remove_unicode_control(str): - remove_re = re.compile(u'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]') + remove_re = re.compile('[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]') return remove_re.sub('', str) def fixup(m): @@ -447,15 +447,15 @@ def unescape(text): # character reference try: if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) + return chr(int(text[3:-1], 16)) else: - return unichr(int(text[2:-1])) + return chr(int(text[2:-1])) except ValueError: pass else: # named entity try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + text = chr(html.entities.name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is @@ -532,7 +532,7 @@ def admin(body): except Exception as exception: print("Exception logging message to admin, halting as to avoid loop") - print(format_exception(exception)) + print((format_exception(exception))) def format_exception(exception): @@ -572,7 +572,7 @@ def make_node(parent, tag, text, **attrs): n = etree.Element(tag) parent.append(n) n.text = text - for k, v in attrs.items(): + for k, v in list(attrs.items()): if v is None: continue if isinstance(v, datetime.datetime): @@ -734,16 +734,16 @@ def lookup_legislator(congress, role_type, name, state, party, when, id_requeste for filename in ("legislators-historical", "legislators-current"): for moc in yaml_load("congress-legislators/%s.yaml" % (filename)): for term in moc["terms"]: - for c in xrange(congress_from_legislative_year(int(term['start'][0:4])) - 1, + for c in range(congress_from_legislative_year(int(term['start'][0:4])) - 1, congress_from_legislative_year(int(term['end'][0:4])) + 1 + 1): lookup_legislator_cache.setdefault(c, []).append((moc, term)) def to_ascii(name): name = name.replace("-", " ") - if not isinstance(name, unicode): + if not isinstance(name, str): return name import unicodedata - return u"".join(c for c in unicodedata.normalize('NFKD', name) if not unicodedata.combining(c)) + return "".join(c for c in unicodedata.normalize('NFKD', name) if not unicodedata.combining(c)) # Scan all of the terms that cover 'when' for a match. if isinstance(when, datetime.datetime): @@ -833,7 +833,7 @@ def translate_legislator_id(source_id_type, source_id, dest_id_type): _translate_legislator_id_cache = { } for filename in ("legislators-historical", "legislators-current"): for moc in yaml_load("congress-legislators/%s.yaml" % (filename)): - for id_type, id_value in moc["id"].items(): + for id_type, id_value in list(moc["id"].items()): try: _translate_legislator_id_cache[(id_type, id_value)] = moc['id'] except TypeError: @@ -871,6 +871,6 @@ class NoInterrupt(object): for sig in self.sigs: signal.signal(sig, self.old_handlers[sig]) # Issue the signals caught during the with-block. - for sig, args in self.signal_received.items(): + for sig, args in list(self.signal_received.items()): if self.old_handlers[sig]: self.old_handlers[sig](*args) diff --git a/tasks/vote_info.py b/tasks/vote_info.py index d55c060..6177b20 100644 --- a/tasks/vote_info.py +++ b/tasks/vote_info.py @@ -132,7 +132,7 @@ def output_vote(vote, options, id_type=None): # preferred order of output: ayes, nays, present, then not voting, and similarly for guilty/not-guilty # and handling other options like people's names for votes for the Speaker. option_sort_order = ('Aye', 'Yea', 'Guilty', 'No', 'Nay', 'Not Guilty', 'OTHER', 'Present', 'Not Voting') - options_list = sorted(vote["votes"].keys(), key=lambda o: option_sort_order.index(o) if o in option_sort_order else option_sort_order.index("OTHER")) + options_list = sorted(list(vote["votes"].keys()), key=lambda o: option_sort_order.index(o) if o in option_sort_order else option_sort_order.index("OTHER")) for option in options_list: if option not in option_keys: option_keys[option] = option @@ -180,18 +180,18 @@ def parse_senate_vote(dom, vote): vote["date"] = parse_date(dom.xpath("string(vote_date)")) if len(dom.xpath("modify_date")) > 0: vote["record_modified"] = parse_date(dom.xpath("string(modify_date)")) # some votes like s1-110.2008 don't have a modify_date - vote["question"] = unicode(dom.xpath("string(vote_question_text)")) + vote["question"] = str(dom.xpath("string(vote_question_text)")) if vote["question"] == "": - vote["question"] = unicode(dom.xpath("string(question)")) # historical votes? - vote["type"] = unicode(dom.xpath("string(vote_question)")) + vote["question"] = str(dom.xpath("string(question)")) # historical votes? + vote["type"] = str(dom.xpath("string(vote_question)")) if vote["type"] == "": vote["type"] = vote["question"] vote["type"] = normalize_vote_type(vote["type"]) vote["category"] = get_vote_category(vote["type"]) - vote["subject"] = unicode(dom.xpath("string(vote_title)")) - vote["requires"] = unicode(dom.xpath("string(majority_requirement)")) - vote["result_text"] = unicode(dom.xpath("string(vote_result_text)")) - vote["result"] = unicode(dom.xpath("string(vote_result)")) + vote["subject"] = str(dom.xpath("string(vote_title)")) + vote["requires"] = str(dom.xpath("string(majority_requirement)")) + vote["result_text"] = str(dom.xpath("string(vote_result_text)")) + vote["result"] = str(dom.xpath("string(vote_result)")) # Senate cloture votes have consistently bad vote_question_text values: They don't say what the cloture # was about specifically, just what bill was relevant. So cloture on an amendment just appears as @@ -218,23 +218,23 @@ def parse_senate_vote(dom, vote): bill_types = {"S.": "s", "S.Con.Res.": "sconres", "S.J.Res.": "sjres", "S.Res.": "sres", "H.R.": "hr", "H.Con.Res.": "hconres", "H.J.Res.": "hjres", "H.Res.": "hres"} - if unicode(dom.xpath("string(document/document_type)")): + if str(dom.xpath("string(document/document_type)")): if dom.xpath("string(document/document_type)") == "PN": vote["nomination"] = { - "number": unicode(dom.xpath("string(document/document_number)")), - "title": unicode(dom.xpath("string(document/document_title)")), + "number": str(dom.xpath("string(document/document_number)")), + "title": str(dom.xpath("string(document/document_title)")), } vote["question"] += ": " + vote["nomination"]["title"] elif dom.xpath("string(document/document_type)") == "Treaty Doc.": vote["treaty"] = { - "title": unicode(dom.xpath("string(document/document_title)")), + "title": str(dom.xpath("string(document/document_title)")), } - elif unicode(dom.xpath("string(document/document_type)")) in bill_types: + elif str(dom.xpath("string(document/document_type)")) in bill_types: vote["bill"] = { "congress": int(dom.xpath("number(document/document_congress|congress)")), # some historical files don't have document/document_congress so take the first of document/document_congress or the top-level congress element as a fall-back - "type": bill_types[unicode(dom.xpath("string(document/document_type)"))], + "type": bill_types[str(dom.xpath("string(document/document_type)"))], "number": int(dom.xpath("number(document/document_number)")), - "title": unicode(dom.xpath("string(document/document_title)")), + "title": str(dom.xpath("string(document/document_title)")), } else: # s294-115.2017 through s302-115.2017 have S.Amdt. in document_type, @@ -242,16 +242,16 @@ def parse_senate_vote(dom, vote): # the rest of is blank. pass - if unicode(dom.xpath("string(amendment/amendment_number)")): - m = re.match(r"^S.Amdt. (\d+)", unicode(dom.xpath("string(amendment/amendment_number)"))) + if str(dom.xpath("string(amendment/amendment_number)")): + m = re.match(r"^S.Amdt. (\d+)", str(dom.xpath("string(amendment/amendment_number)"))) if m: vote["amendment"] = { "type": "s", "number": int(m.group(1)), - "purpose": unicode(dom.xpath("string(amendment/amendment_purpose)")), + "purpose": str(dom.xpath("string(amendment/amendment_purpose)")), } - amendment_to = unicode(dom.xpath("string(amendment/amendment_to_document_number)")) + amendment_to = str(dom.xpath("string(amendment/amendment_to_document_number)")) if "Treaty" in amendment_to: treaty, number = amendment_to.split("-") vote["treaty"] = { @@ -264,7 +264,7 @@ def parse_senate_vote(dom, vote): "congress": vote["congress"], "type": bill_types[bill_type], "number": int(bill_number), - "title": unicode(dom.xpath("string(amendment/amendment_to_document_short_title)")), + "title": str(dom.xpath("string(amendment/amendment_to_document_short_title)")), } else: # Senate votes: @@ -289,7 +289,7 @@ def parse_senate_vote(dom, vote): logging.info("[%s] Missing lis_member_id, falling back to name lookup for %s" % (vote["vote_id"], voter["last_name"])) # Ensure the options are noted, even if no one votes that way. - if unicode(dom.xpath("string(question)")) == "Guilty or Not Guilty": + if str(dom.xpath("string(question)")) == "Guilty or Not Guilty": vote["votes"]['Guilty'] = [] vote["votes"]['Not Guilty'] = [] else: @@ -307,7 +307,7 @@ def parse_senate_vote(dom, vote): "id": str(member.xpath("string(lis_member_id)")), "state": str(member.xpath("string(state)")), "party": str(member.xpath("string(party)")), - "display_name": unicode(member.xpath("string(member_full)")), + "display_name": str(member.xpath("string(member_full)")), "first_name": str(member.xpath("string(first_name)")), "last_name": str(member.xpath("string(last_name)")), }) @@ -323,14 +323,14 @@ def parse_house_vote(dom, vote): return datetime.datetime.strptime(d, "%d-%b-%Y") vote["date"] = parse_date(str(dom.xpath("string(vote-metadata/action-date)")) + " " + str(dom.xpath("string(vote-metadata/action-time)"))) - vote["question"] = unicode(dom.xpath("string(vote-metadata/vote-question)")) - vote["type"] = unicode(dom.xpath("string(vote-metadata/vote-question)")) + vote["question"] = str(dom.xpath("string(vote-metadata/vote-question)")) + vote["type"] = str(dom.xpath("string(vote-metadata/vote-question)")) vote["type"] = normalize_vote_type(vote["type"]) - if unicode(dom.xpath("string(vote-metadata/vote-desc)")).startswith("Impeaching "): + if str(dom.xpath("string(vote-metadata/vote-desc)")).startswith("Impeaching "): vote["category"] = "impeachment" else: vote["category"] = get_vote_category(vote["question"]) - vote["subject"] = unicode(dom.xpath("string(vote-metadata/vote-desc)")) + vote["subject"] = str(dom.xpath("string(vote-metadata/vote-desc)")) if not vote["subject"]: del vote["subject"] @@ -338,10 +338,10 @@ def parse_house_vote(dom, vote): vote_types = {"YEA-AND-NAY": "1/2", "2/3 YEA-AND-NAY": "2/3", "3/5 YEA-AND-NAY": "3/5", "1/2": "1/2", "2/3": "2/3", "QUORUM": "QUORUM", "RECORDED VOTE": "1/2", "2/3 RECORDED VOTE": "2/3", "3/5 RECORDED VOTE": "3/5"} vote["requires"] = vote_types.get(str(dom.xpath("string(vote-metadata/vote-type)")), "unknown") - vote["result_text"] = unicode(dom.xpath("string(vote-metadata/vote-result)")) - vote["result"] = unicode(dom.xpath("string(vote-metadata/vote-result)")) + vote["result_text"] = str(dom.xpath("string(vote-metadata/vote-result)")) + vote["result"] = str(dom.xpath("string(vote-metadata/vote-result)")) - bill_num = unicode(dom.xpath("string(vote-metadata/legis-num)")) + bill_num = str(dom.xpath("string(vote-metadata/legis-num)")) if bill_num not in ("", "QUORUM", "JOURNAL", "MOTION", "ADJOURN") and not re.match(r"QUORUM \d+$", bill_num): bill_types = {"S": "s", "S CON RES": "sconres", "S J RES": "sjres", "S RES": "sres", "H R": "hr", "H CON RES": "hconres", "H J RES": "hjres", "H RES": "hres"} try: @@ -358,16 +358,16 @@ def parse_house_vote(dom, vote): vote["amendment"] = { "type": "h-bill", "number": int(str(dom.xpath("string(vote-metadata/amendment-num)"))), - "author": unicode(dom.xpath("string(vote-metadata/amendment-author)")), + "author": str(dom.xpath("string(vote-metadata/amendment-author)")), } # Assemble a complete question from the vote type, amendment, and bill number. if "amendment" in vote and "bill" in vote: - vote["question"] += ": Amendment %s to %s" % (vote["amendment"]["number"], unicode(dom.xpath("string(vote-metadata/legis-num)"))) + vote["question"] += ": Amendment %s to %s" % (vote["amendment"]["number"], str(dom.xpath("string(vote-metadata/legis-num)"))) elif "amendment" in vote: vote["question"] += ": Amendment %s to [unknown bill]" % vote["amendment"]["number"] elif "bill" in vote: - vote["question"] += ": " + unicode(dom.xpath("string(vote-metadata/legis-num)")) + vote["question"] += ": " + str(dom.xpath("string(vote-metadata/legis-num)")) if "subject" in vote: vote["question"] += " " + vote["subject"] elif "subject" in vote: @@ -380,10 +380,10 @@ def parse_house_vote(dom, vote): vote["votes"].setdefault(vote_option, []).append(voter) # Ensure the options are noted, even if no one votes that way. - if unicode(dom.xpath("string(vote-metadata/vote-question)")) == "Election of the Speaker": + if str(dom.xpath("string(vote-metadata/vote-question)")) == "Election of the Speaker": for n in dom.xpath('vote-metadata/vote-totals/totals-by-candidate/candidate'): vote["votes"][n.text] = [] - elif unicode(dom.xpath("string(vote-metadata/vote-question)")) == "Call of the House": + elif str(dom.xpath("string(vote-metadata/vote-question)")) == "Call of the House": for n in dom.xpath('vote-metadata/vote-totals/totals-by-candidate/candidate'): vote["votes"][n.text] = [] elif "YEA-AND-NAY" in dom.xpath('string(vote-metadata/vote-type)'): @@ -398,7 +398,7 @@ def parse_house_vote(dom, vote): vote["votes"]['Not Voting'] = [] for member in dom.xpath("vote-data/recorded-vote"): - display_name = unicode(member.xpath("string(legislator)")) + display_name = str(member.xpath("string(legislator)")) state = str(member.xpath("string(legislator/@state)")) party = str(member.xpath("string(legislator/@party)")) vote_cast = str(member.xpath("string(vote)")) @@ -424,7 +424,7 @@ def parse_house_vote(dom, vote): # See https://github.com/unitedstates/congress/issues/46. seen_ids = set() - all_voters = sum(vote["votes"].values(), []) + all_voters = sum(list(vote["votes"].values()), []) all_voters.sort(key=lambda v: len(v["display_name"]), reverse=True) # process longer names first for v in all_voters: if v["id"] not in ("", "0000000"): diff --git a/tasks/votes.py b/tasks/votes.py index 4f60485..c073d22 100644 --- a/tasks/votes.py +++ b/tasks/votes.py @@ -5,7 +5,7 @@ import datetime import os import os.path import re -import urlparse +import urllib.parse import time import datetime from lxml import html, etree @@ -90,7 +90,7 @@ def vote_ids_for_house(congress, session_year, options): # download inside page, find the matching links page = utils.download( - urlparse.urljoin(index_page, link.get("href")), + urllib.parse.urljoin(index_page, link.get("href")), "%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp), options) diff --git a/tasks/voteview.py b/tasks/voteview.py index 36a2ccb..1252068 100644 --- a/tasks/voteview.py +++ b/tasks/voteview.py @@ -1,5 +1,5 @@ import re -import StringIO +import io import csv import datetime import time @@ -22,7 +22,7 @@ def run(options): chamber = options.get('chamber', None) # we're going to need to map votes to sessions because in modern history the numbering resets by session - session_dates = list(csv.DictReader(StringIO.StringIO(utils.download("http://www.govtrack.us/data/us/sessions.tsv").encode("utf8")), delimiter="\t")) + session_dates = list(csv.DictReader(io.StringIO(utils.download("http://www.govtrack.us/data/us/sessions.tsv").encode("utf8")), delimiter="\t")) # download the vote data now if chamber and chamber in [ "h", "s" ]: @@ -462,7 +462,7 @@ def build_votes(vote_list): }) # sort for output - for voters in votes.values(): + for voters in list(votes.values()): voters.sort(key=lambda v: v['display_name']) return (votes, presidents_positions) diff --git a/test/test_house_hearing.py b/test/test_house_hearing.py index b61c9ac..8e9b35d 100644 --- a/test/test_house_hearing.py +++ b/test/test_house_hearing.py @@ -47,7 +47,7 @@ class HearingInfo(unittest.TestCase): self.assertEqual(test_output['room'], 'CAPITOL H-313') self.assertEqual(test_output['subcommittee'], None) self.assertEqual(test_output[ - 'topic'], u'H.R. 4435\u2014National Defense Authorization Act for Fiscal Year 2015 [General Debate]; H.R. 4660\u2014Commerce, Justice, Science, and Related Agencies Appropriations Act, 2015') + 'topic'], 'H.R. 4435\u2014National Defense Authorization Act for Fiscal Year 2015 [General Debate]; H.R. 4660\u2014Commerce, Justice, Science, and Related Agencies Appropriations Act, 2015') self.assertEqual(test_output[ 'url'], 'http://docs.house.gov/Committee/Calendar/ByEvent.aspx?EventID=102252')