congress/congress/tasks/voteview.py

import re
import io
import csv
import datetime
import time
import logging

from congress.tasks import utils
from congress.tasks.vote_info import output_vote

# load some hard-coded codes
special_vote_options = { }
for rec in csv.reader(open("tasks/voteview_codedoptions.csv")):
    if rec[0] == "vote date": continue # header
    special_vote_options[rec[1]] = (rec[2], dict((int(r.split(':', 1)[0]), r.split(':', 1)[1]) for r in rec[3].split(';')))


def run(options):
    congress = options.get("congress", None)
    congress = int(congress) if congress else utils.current_congress()

    chamber = options.get('chamber', None)

    # we're going to need to map votes to sessions because in modern history the numbering resets by session
    session_dates = list(csv.DictReader(io.StringIO(utils.download("http://www.govtrack.us/data/us/sessions.tsv").encode("utf8")), delimiter="\t"))

    # download the vote data now
    if chamber and chamber in [ "h", "s" ]:
        votes = get_votes(chamber, congress, options, session_dates)
    else:
        votes = get_votes("h", congress, options, session_dates) + get_votes("s", congress, options, session_dates)

    utils.process_set(votes, put_vote, options)


def vote_list_source_urls_for(congress, chamber, options):
    url = "http://www.voteview.com/%s%02d.htm" % (("house" if chamber == "h" else "senate"), congress)
    index_page = utils.download(url, cache_file_for(congress, chamber, "html"), options)
    if index_page == None:
        raise Exception("No data.")  # should only happen on a 404

    def match(pattern):
        matches = re.findall(pattern, index_page, re.I)
        if len(matches) != 1:
            raise ValueError("Index page %s did not match one value for pattern %s." % (url, pattern))
        return matches[0]

    return match("ftp://voteview.com/[^\.\s]+\.ord"), match("ftp://voteview.com/dtl/[^\.\s]+\.dtl")


def cache_file_for(congress, chamber, file_type):
    return "voteview/%s-%s.%s" % (congress, chamber, file_type)


def get_state_from_icpsr_state_code(icpsr_state_code):
    icpsr_state_code_map = {
        1: "CT",
        2: "ME",
        3: "MA",
        4: "NH",
        5: "RI",
        6: "VT",
        11: "DE",
        12: "NJ",
        13: "NY",
        14: "PA",
        21: "IL",
        22: "IN",
        23: "MI",
        24: "OH",
        25: "WI",
        31: "IA",
        32: "KS",
        33: "MN",
        34: "MO",
        35: "NE",
        36: "ND",
        37: "SD",
        40: "VA",
        41: "AL",
        42: "AR",
        43: "FL",
        44: "GA",
        45: "LA",
        46: "MS",
        47: "NC",
        48: "SC",
        49: "TX",
        51: "KY",
        52: "MD",
        53: "OK",
        54: "TN",
        55: "DC",
        56: "WV",
        61: "AZ",
        62: "CO",
        63: "ID",
        64: "MT",
        65: "NV",
        66: "NM",
        67: "UT",
        68: "WY",
        71: "CA",
        72: "OR",
        73: "WA",
        81: "AK",
        82: "HI",
        99: None,  # Used by presidents
    }

    return icpsr_state_code_map[icpsr_state_code]


def get_party_from_icpsr_party_code(icpsr_party_code):
    icpsr_party_code_map = {
        1: "Federalist",
        9: "Jefferson Republican",
        10: "Anti-Federalist",
        11: "Jefferson Democrat",
        13: "Democrat-Republican",
        22: "Adams",
        25: "National Republican",
        26: "Anti Masonic",
        29: "Whig",
        34: "Whig and Democrat",
        37: "Constitutional Unionist",
        40: "Anti-Democrat and States Rights",
        41: "Anti-Jackson Democrat",
        43: "Calhoun Nullifier",
        44: "Nullifier",
        46: "States Rights",
        48: "States Rights Whig",
        100: "Democrat",
        101: "Jackson Democrat",
        103: "Democrat and Anti-Mason",
        104: "Van Buren Democrat",
        105: "Conservative Democrat",
        108: "Anti-Lecompton Democrat",
        110: "Popular Sovereignty Democrat",
        112: "Conservative",
        114: "Readjuster",
        117: "Readjuster Democrat",
        118: "Tariff for Revenue Democrat",
        119: "United Democrat",
        200: "Republican",
        202: "Union Conservative",
        203: "Unconditional Unionist",
        206: "Unionist",
        208: "Liberal Republican",
        212: "United Republican",
        213: "Progressive Republican",
        214: "Non-Partisan and Republican",
        215: "War Democrat",
        300: "Free Soil",
        301: "Free Soil Democrat",
        302: "Free Soil Whig",
        304: "Anti-Slavery",
        308: "Free Soil American and Democrat",
        310: "American",
        326: "National Greenbacker",
        328: "Independent",
        329: "Ind. Democrat",
        331: "Ind. Republican",
        333: "Ind. Republican-Democrat",
        336: "Anti-Monopolist",
        337: "Anti-Monopoly Democrat",
        340: "Populist",
        341: "People's",
        347: "Prohibitionist",
        353: "Ind. Silver Republican",
        354: "Silver Republican",
        355: "Union",
        356: "Union Labor",
        370: "Progressive",
        380: "Socialist",
        401: "Fusionist",
        402: "Liberal",
        403: "Law and Order",
        522: "American Labor",
        537: "Farmer-Labor",
        555: "Jackson",
        603: "Ind. Whig",
        1060: "Silver",
        1061: "Emancipationist",
        1111: "Liberty",
        1116: "Conservative Republican",
        1275: "Anti-Jackson",
        1346: "Jackson Republican",
        3333: "Opposition",
        4000: "Anti-Administration",
        4444: "Union",
        5000: "Pro-Administration",
        6000: "Crawford Federalist",
        6666: "Crawford Republican",
        7000: "Jackson Federalist",
        7777: "Crawford Republican",
        8000: "Adams-Clay Federalist",
        8888: "Adams-Clay Republican",
        9000: "Unknown",
        9999: "Unknown",
    }

    return icpsr_party_code_map.get(icpsr_party_code)


def parse_voteview_vote_code(vote_code):
    # Convert the integer codes into a tuple containing:
    #    standard vote options "Yea", "Nay", "Not Voting", "Present"
    #    an additional string so that we don't lose any information provided by voteview
    # Probably the House used Aye and No in some votes, but we don't
    # know which. "Yea" and "Nay" are always used by the Senate, and always
    # in the House on the passage of bills.
    # A paired vote is when two members plan to be absent in a way that
    # does not affect the vote outcome. You can see in the Congressional
    # record who is paired with who. Sometimes the pairings are for a
    # particular vote, other pairings are "until further notice." The paired
    # members are recorded as not voting. A live pair is when one half of
    # the pair is present and withdraws their vote and votes present because
    # the other half of the pair isn't there. Live pairs aren't recorded
    # in this data and are treated simply as pairs (and thus for us, not
    # voting). Some paired members are recorded simply as present in this
    # data --- not clear why that would be.
    # See the House vote on the Civil Rights Act of 1957 (85th Congress,
    # Jun 18, 1957, what this data calls #42, volume 103 page 9518 of
    # the Congressional Record) for an example of paired votes.
    vote_code_map = {
        0: (None, None),  # not a member at the time of the vote (but sometimes recorded as Not Voting)
        1: ("Yea", None),
        2: ("Not Voting", "paired-yea"),
        3: ("Not Voting", "announced-yea"),
        4: ("Not Voting", "announced-nay"),
        5: ("Not Voting", "paired-nay"),
        6: ("Nay", None),
        7: ("Present", "type-seven"),
        8: ("Present", "type-eight"),
        9: ("Not Voting", None),
    }
    return vote_code_map[vote_code]


def parse_vote_list_line(vote_list_line):
    return re.match(r"^([\s\d]{2}\d)([\s\d]{4}\d)([\s\d]\d)([\s\d]{2})([^\d]+?)([\s\d]{3}\d)([\s\d])([\s\d])([^\s\d][^\d]+?(?:\d\s+)?)(\d+)$", vote_list_line).groups()


def parse_rollcall_dtl_list_line(rollcall_list_line):
    return re.match(r"^([\s\d]{3}\d)([\s\d]{4}\d)?([\s\d]\d)\s(.*?)\s*$", rollcall_list_line).groups()


def parse_rollcall_dtl_list_first_line(rollcall_dtl_first_line):
    return re.match(r"^(.{14})(.{15})(.{10})?(.+?)(?:\s{3,}\d{2,3})?$", rollcall_dtl_first_line).groups()


def parse_rollcall_dtl_date(rollcall_dtl_date):
    from datetime import datetime

    potential_date_formats = [
        "%b %d, %Y",  # JAN 1, 1900
        "%B %d, %Y",  # JANUARY 1, 1900
        "%b, %d, %Y",  # JAN, 1, 1900
        "%B, %d, %Y",  # JANUARY, 1, 1900
        "%b.%d, %Y",  # JAN.1, 1900
    ]

    # Make things easier by removing periods after month abbreviations.
    rollcall_dtl_date = rollcall_dtl_date.replace(". ", " ")

    # Make things easier by inserting spaces after commas where they are missing.
    rollcall_dtl_date = rollcall_dtl_date.replace(",1", ", 1")

    # Python doesn't consider "SEPT" a valid abbreviation for September.
    rollcall_dtl_date = rollcall_dtl_date.replace("SEPT ", "SEP ")

    parsed_date = None

    for potential_date_format in potential_date_formats:
        try:
            parsed_date = datetime.strptime(rollcall_dtl_date, potential_date_format)
        except ValueError:
            pass
        else:
            break

    formatted_date = utils.format_datetime(parsed_date)

    return formatted_date[:10] if formatted_date is not None else formatted_date


def extract_vote_info_from_parsed_vote_list_line(parsed_vote_list_line):
    vote_info = {
        "congress": int(parsed_vote_list_line[0]) if parsed_vote_list_line[0].strip() else None,
        "icpsr_id": int(parsed_vote_list_line[1]) if parsed_vote_list_line[1].strip() else None,
        "icpsr_state": int(parsed_vote_list_line[2]) if parsed_vote_list_line[2].strip() else None,
        "district": int(parsed_vote_list_line[3]) if parsed_vote_list_line[3].strip() else None,
        # parsed_vote_list_line[4] is partial state name
        "state_name": parsed_vote_list_line[4].strip(),
        "icpsr_party": int(parsed_vote_list_line[5]) if parsed_vote_list_line[5].strip() else None,
        "occupancy": int(parsed_vote_list_line[6]) if parsed_vote_list_line[6].strip() else None,
        "means": int(parsed_vote_list_line[7]) if parsed_vote_list_line[7].strip() else None,
        # parsed_vote_list_line[8] is partial member name
        "member_name": parsed_vote_list_line[8].strip(),
        "votes": [int(icpsr_vote_code) for icpsr_vote_code in parsed_vote_list_line[9]],
    }

    return vote_info


def extract_rollcall_info_from_parsed_rollcall_dtl_list_line(parsed_rollcall_dtl_list_line):
    rollcall_info = {
        "vote": int(parsed_rollcall_dtl_list_line[0]),
        "line": int(parsed_rollcall_dtl_list_line[2]),
        "text": parsed_rollcall_dtl_list_line[3],
    }

    return rollcall_info


def parse_vote_list_file(vote_list_file):
    # Each line in the vote list file is for a Member of Congress, with
    # identifying data in the left column followed by one character per
    # vote (1=aye, etc.).

    logging.info("Parsing vote list file...")

    vote_list_info = []

    for vote_list_line in vote_list_file.split("\r\n"):
        if not vote_list_line.strip():
            continue

        vote_info = extract_vote_info_from_parsed_vote_list_line(parse_vote_list_line(vote_list_line))

        vote_info["state"] = get_state_from_icpsr_state_code(vote_info["icpsr_state"]) if vote_info["icpsr_state"] is not None else None
        vote_info["party"] = get_party_from_icpsr_party_code(vote_info["icpsr_party"]) if vote_info["icpsr_party"] is not None else None

        icpsr_id = vote_info["icpsr_id"]

        # I think these are mistakes? Don't know if the 9- codes something special.
        if icpsr_id == 91449: icpsr_id = 1449
        if icpsr_id == 92484: icpsr_id = 2484
        if icpsr_id == 94804: icpsr_id = 4804
        if icpsr_id == 94891: icpsr_id = 4891
        if icpsr_id == 96738: icpsr_id = 6738
        if icpsr_id == 98500: icpsr_id = 8500
        if icpsr_id == 99369: icpsr_id = 9369
        if icpsr_id == 90618: icpsr_id = 10618
        if icpsr_id == 90634: icpsr_id = 10634
        if icpsr_id == 91043: icpsr_id = 11043
        if icpsr_id == 93033: icpsr_id = 13033
        if icpsr_id == 94428: icpsr_id = 14428
        if icpsr_id == 94454: icpsr_id = 14454
        if icpsr_id == 94602: icpsr_id = 14602
        if icpsr_id == 94628: icpsr_id = 14628
        if icpsr_id == 95122: icpsr_id = 15122
        if icpsr_id == 95415: icpsr_id = 15415
        if icpsr_id == 3769: icpsr_id = 15101 # guy was given two ids
        if icpsr_id == 14240: icpsr_id = 94240 # per our id

        try:
            bioguide_id = utils.get_person_id("icpsr" if vote_info["state_name"] != "USA" else "icpsr_prez", icpsr_id, "bioguide")
        except KeyError as e:
            # skip some guys named Poe (99999) and Chambers (10509) that don't seem to have existed and didn't cast actual votes,
            # and Jack Swigert (15067) who died before being sworn in.
            # and presidents may not have bioguide IDs
            if icpsr_id not in (99999, 10509, 15067) and vote_info["state_name"] != "USA":
                logging.error("Problem with member %s ([%d] %s) of %s %s: %s" % (vote_info["member_name"], vote_info["icpsr_party"], vote_info["party"],
                                                                             vote_info["state_name"], vote_info["district"], e.message))
                #logging.error(vote_info)
            bioguide_id = None
        else:
            logging.debug("Parsed member %s ([%d] %s) of %s %s..." % (vote_info["member_name"], vote_info["icpsr_party"], vote_info["party"],
                                                                      vote_info["state_name"], vote_info["district"]))
        vote_info["bioguide_id"] = bioguide_id

        # This is used to record the President's position, or something.
        # Mark this record so build_votes can separated it out from Member votes.
        vote_info["is_president"] = True if vote_info["icpsr_state"] == 99 else False

        vote_list_info.append(vote_info)

    return vote_list_info


def parse_rollcall_dtl_list_file(rollcall_dtl_list_file, congress):
    rollcall_dtl_list_info = {}

    for rollcall_dtl_list_line in rollcall_dtl_list_file.split("\r\n"):
        if not rollcall_dtl_list_line.strip():
            continue

        rollcall_dtl_list_line_info = extract_rollcall_info_from_parsed_rollcall_dtl_list_line(parse_rollcall_dtl_list_line(rollcall_dtl_list_line))

        if rollcall_dtl_list_line_info["line"] == 1:
            rollcall_info = {}

            rollcall_dtl_list_first_line_parts = parse_rollcall_dtl_list_first_line(rollcall_dtl_list_line_info["text"])
            rollcall_info["record_id"] = rollcall_dtl_list_first_line_parts[0].strip()
            rollcall_info["journal_id"] = rollcall_dtl_list_first_line_parts[1].strip()

            rollcall_info["date_unparsed"] = rollcall_dtl_list_first_line_parts[3].strip()
            rollcall_info["date"] = parse_rollcall_dtl_date(rollcall_info["date_unparsed"])

            rollcall_info["bill_unparsed"] = rollcall_dtl_list_first_line_parts[2].strip()
            m = re.match(r"([A-Z]+)([0-9]+)$", rollcall_info["bill_unparsed"])
            if m:
                bill_type_map = {
                   'HR': 'hr', 'H': 'hr',
                   'S': 's',
                   'HJR': 'hjres', 'HJ': 'hjres', 'HJRE': 'hjres', 'HJRES': 'hjres',
                   'SJR': 'sjres', 'SJ': 'sjres', 'SJRE': 'sjres', 'SJRES': 'sjres',
                   'HCR': 'hconres', 'HCRE': 'hconres', 'HCRES': 'hconres', 'HCONR': 'hconres', 'HCON': 'hconres',
                   'SCR': 'sconres', 'SCRE': 'sconres', 'SCRES': 'sconres', 'SCONRES': 'sconres', 'SCONR': 'sconres', 'SCON': 'sconres',
                   'HRE': 'hres', 'HRES': 'hres',
                   'SRE': 'sres', 'SR': 'sres', 'SRES': 'sres' }
                if not m.group(1) in bill_type_map:
                    logging.error('Could not parse bill: %s' % rollcall_info["bill_unparsed"])
                else:
                    rollcall_info["bill"] = { 'congress': congress, 'type': bill_type_map[m.group(1)], 'number': int(m.group(2)) }

        elif rollcall_dtl_list_line_info["line"] == 2:
            pass
        elif rollcall_dtl_list_line_info["line"] == 3:
            rollcall_info["description"] = rollcall_dtl_list_line_info["text"]
        else:
            rollcall_info["description"] += " " + rollcall_dtl_list_line_info["text"]

        rollcall_dtl_list_info[rollcall_dtl_list_line_info["vote"]] = rollcall_info

    return rollcall_dtl_list_info


def build_votes(vote_list):
    # Go from a list of individuals (and their votes) to a mapping
    # from votes to how the individuals voted on it.

    logging.info("Building votes...")

    votes = {}
    presidents_positions = {}

    for voter in vote_list:
        for i, choice in enumerate(voter["votes"]):
            # Separate the president's position from Member votes.
            if voter["is_president"]:
                presidents_positions[i] = choice
                continue

            # Drop anyone we didn't have a bioguide id for. We issued warnings
            # when we did the lookup if we couldn't find the id. Any remaining
            # cases are individuals who didn't actually take office and didn't
            # actually vote. Presidents may not have bioguide IDs so we filter
            # those first above.
            if voter["bioguide_id"] is None:
                continue

            # Make a record for this vote, grouped by vote option (Aye, etc).
            votes.setdefault(i, []).append({
                "id": voter["bioguide_id"],
                "display_name": voter["member_name"],
                "party": voter["party"],
                "state": voter["state"],
                "vote": choice,
            })

    # sort for output
    for voters in votes.values():
        voters.sort(key=lambda v: v['display_name'])

    return (votes, presidents_positions)


def session_from_date(date, session_dates):
    for sess in session_dates:
        if sess["start"] <= date <= sess["end"]:
            return int(sess["congress"]), sess["session"]
    return None, None

def parse_rollcall_description(rollcall):
    # The description sometimes has additional metadata. It's a little tricky
    # to parse because the description has hyphens at the ends of lines where
    # words are split.
    dparts = rollcall['description'].split(". ")
    while len(dparts) > 1:
        dpart = dparts[-1].strip(".- ") # remove trailing spaces, hyphens, and periods (which occur at the end of the final dpart but not inner ones because it is the split string)
        if dpart == "NAY SUPPORTS PRESIDENT'S POSITION":
            rollcall['presidents_position'] =  { "option": "Nay" } # also recorded in the big table, so we probably already have this
        elif dpart == "YEA SUPPORTS PRESIDENT'S POSITION":
            rollcall['presidents_position'] =  { "option": "Yea" }
        elif dpart in ("REJECTED", "PASSED", "AGREED TO", "ADOPTED", "ACCEPTED", "CONFIRMED", "RATIFIED"):
            rollcall['result'] = dpart.title()
        elif dpart.startswith("(SEE CQ "):
            pass # remove this
        else:
            # Unrecognized, so stop here.
            break
        # Remove this part from the description.
        dparts.pop(-1)
    rollcall['description'] = ". ".join(dparts)
    if not rollcall['description'].endswith('.'): rollcall['description'] += "."

def build_votes_dict(votes_list, rollcall):
    if rollcall.get("description") in special_vote_options:
        # Some votes are for things besides aye/no etc where the vote
        # description says how the numeric codes are mapped to options.
        # e.g. for Election of the Speaker, 1 will be one candidate, 2
        # will be another candidate. We've manually coded these and
        # loaded them at the top of the module. In these cases, we also
        # have replacement strings for the vote description.
        original_description = rollcall["description"]
        new_description, vote_codes = special_vote_options[original_description]
        rollcall["description"] = new_description
        for v in votes_list:
            if v["vote"] == 0:
                v["vote"] = None
            elif v["vote"] == 9:
                v["vote"] = "Not Voting"
            else:
                try:
                    v["vote"] = vote_codes[v["vote"]]
                except KeyError:
                    logging.error('Vote "%s" had a "%d" vote.' % (original_description, v["vote"]))
                    v["vote"] = "Unknown"

    else:
        # This is a regular vote. Use the regular voteview codebook.
        for v in votes_list:
            v["vote"], v["voteview_votecode_extra"] = parse_voteview_vote_code(v["vote"])

    # Now make a dict from vote option to the legislators who voted
    # that option. Preserve ordering of votes_list which is already
    # sorted.
    ret = {
        choice: [v for v in votes_list if v["vote"] == choice]
        for choice in set(v["vote"] for v in votes_list)
        if choice != None # legislators who were not serving at the time of the vote
    }

    # No longer need the "vote" keys.
    for v in votes_list:
        del v["vote"]

    return ret

def get_votes(chamber, congress, options, session_dates):
    logging.warn("Getting votes for %d-%s..." % (congress, chamber))

    vote_list_url, rollcall_list_url = vote_list_source_urls_for(congress, chamber, options)

    # Load the ORD file which contains the matrix of how people voted.

    vote_list_file = utils.download(vote_list_url, cache_file_for(congress, chamber, "ord"), options).encode("utf-8")
    if not vote_list_file:
        logging.error("Couldn't download vote list file.")
        return None

    vote_list = parse_vote_list_file(vote_list_file)
    votes, presidents_positions = build_votes(vote_list)

    # Load the DTL file which lists each roll call vote with textual metadata.

    rollcall_list_file = utils.download(rollcall_list_url, cache_file_for(congress, chamber, "dtl"), options).encode("utf-8")
    if not rollcall_list_file:
        logging.error("Couldn't download rollcall list file.")
        return None
    rollcall_list = parse_rollcall_dtl_list_file(rollcall_list_file, congress)

    # Some dates are valid but incorrect. When the date doesn't even fall
    # within the Congress that we know the vote falls in, clear out the
    # date so we can try to guess a valid date in the next step.
    for rollcall_number in rollcall_list:
        rollcall = rollcall_list[rollcall_number]
        if rollcall["date"]:
            d_congress, d_session = session_from_date(rollcall["date"], session_dates)
            if d_congress != congress:
                rollcall["date"] = None

    # The dates listed in the DTL file were originally OCRd and have tons
    # of errors. Many strings could not be parsed. There are occasional
    # invalid dates (like Feb 29 on a non-leap year --- the 9s are probably
    # incorrectly OCR'd 5's). Try to resolve these quickly without resorting
    # to manual fact-checking...
    for i in range(min(rollcall_list)+1, max(rollcall_list) - 1):
        if rollcall_list[i]["date"]:
            continue  # was OK
        if not rollcall_list[i - 1]["date"]:
            continue  # preceding date not OK

        # If the vote is surrounded by votes on the same day, set the date to that day.
        if rollcall_list[i - 1]["date"] == rollcall_list[i + 1]["date"]:
            rollcall_list[i]["date"] = rollcall_list[i - 1]["date"]
            logging.error("Replacing %s with %s." % (rollcall_list[i]["date_unparsed"], rollcall_list[i - 1]["date"]))

        # Lump the vote with the previous date.
        else:
            rollcall_list[i]["date"] = rollcall_list[i - 1]["date"]
            logging.error("Replacing %s with %s (but might be as late as %s)." % (rollcall_list[i]["date_unparsed"], rollcall_list[i - 1]["date"], rollcall_list[i + 1]["date"]))

    # Form the output data.

    vote_output_list = []

    for rollcall_number in rollcall_list:
        vote_results = votes[rollcall_number - 1]
        rollcall = rollcall_list[rollcall_number]

        # Which session is this in? Compare the vote's date to the sessions.tsv file.
        if not rollcall["date"]:
            logging.error("Vote on %s was an invalid date, so we can't determine the session to save the file.. | %s" % (rollcall["date_unparsed"], rollcall["description"]))
            continue

        s_congress, session = session_from_date(rollcall["date"], session_dates)
        if s_congress != congress:
            # should not occur - handled above
            logging.error("Vote on %s disagrees about which Congress it is in." % rollcall["date"])
            continue
        if session is None:
            # This vote did not occur durring a session of Congress. Some sort of data error.
            logging.error("Vote on %s is not within a session of Congress." % rollcall["date"])
            continue

        # Only process votes from the requested session.
        if options.get("session") and session != options["session"]:
            continue

        rollcall['result'] = "unknown"
        if "description" in rollcall:
            parse_rollcall_description(rollcall)

        # Make the votes dictionary, but also replace the description
        # text when it contains coded vote information.
        votes_dict = build_votes_dict(vote_results, rollcall)

        # Form the vote dict.
        vote_output = {
            "vote_id": "%s%s-%d.%s" % (chamber, rollcall_number, congress, session),
            "source_url": "http://www.voteview.com",
            "updated_at": datetime.datetime.fromtimestamp(time.time()),

            "congress": congress,
            "session": session,
            "chamber": chamber,
            "number": rollcall_number,  # XXX: This is not the right number.
            "question": rollcall["description"] if "description" in rollcall else None,  # Sometimes there isn't a description.
            "type": normalize_vote_type(rollcall["description"]) if "description" in rollcall else None,
            "date": datetime.date(*[int(dd) for dd in rollcall["date"].split("-")]),  # turn YYYY-MM-DD into datetime.date() instance
            "date_unparsed": rollcall["date_unparsed"],
            "votes": votes_dict,
            "presidents_position": presidents_positions.get(rollcall_number) or rollcall.get('presidents_position'),
            "bill": rollcall.get('bill'),

            "category": "unknown",
            "requires": "unknown",
            "result": rollcall['result'],
        }

        vote_output_list.append(vote_output)

    return vote_output_list


def put_vote(vote, options):
    output_vote(vote, options, id_type="bioguide")
    return {"ok": True, "saved": True}


def normalize_vote_type(descr):
    if descr.startswith("TO PASS "):
        return "On Passage"
    if descr.startswith("TO AMEND "):
        return "On the Amendment"
    if descr.startswith("TO CONCUR IN THE SENATE AMENDMENT "):
        return "Concurring in the Senate Amendment"
    if descr.startswith("TO READ THE SECOND TIME "):
        return "Reading the Second Time"
    if descr.startswith("TO ADVISE AND CONSENT TO THE RATIFICATION OF THE TREATY"):
        return "On the Treaty"
    #logging.error("Unknown vote type: " + descr)
    return descr