diff --git a/scripts/alternate_bulk_formats.py b/scripts/alternate_bulk_formats.py index 3174f8d..6b60003 100755 --- a/scripts/alternate_bulk_formats.py +++ b/scripts/alternate_bulk_formats.py @@ -1,6 +1,6 @@ import csv import json -from utils import write, format_datetime, load_data +import utils def run(): @@ -57,15 +57,15 @@ def run(): print("Loading %s..." %yaml_social) - social = load_data(yaml_social) + social = utils.load_data(yaml_social) for filename in yamls: print("Loading %s..." % filename) - legislators = load_data(filename) + legislators = utils.load_data(filename) #convert yaml to json - write( - json.dumps(legislators, sort_keys=True, indent=2, default=format_datetime), + utils.write( + json.dumps(legislators, sort_keys=True, indent=2, default=utils.format_datetime), "../alternate_formats/%s.json" %filename.rstrip(".yaml")) #convert yaml to csv diff --git a/scripts/bioguide.py b/scripts/bioguide.py index 15db1eb..369dbbc 100755 --- a/scripts/bioguide.py +++ b/scripts/bioguide.py @@ -15,7 +15,7 @@ import lxml.html, io import datetime import re import utils -from utils import download, load_data, save_data, parse_date +from utils import download, load_data, save_data def run(): @@ -99,8 +99,6 @@ def run(): relationships.append({ "relation": relationship, "name": person}) return relationships - debug = utils.flags().get('debug', False) - # default to caching cache = utils.flags().get('cache', True) force = not cache diff --git a/scripts/committee_membership.py b/scripts/committee_membership.py index 2889edf..9360436 100755 --- a/scripts/committee_membership.py +++ b/scripts/committee_membership.py @@ -7,7 +7,7 @@ import re, lxml.html, lxml.etree, io, datetime from collections import OrderedDict import utils -from utils import download, load_data, save_data, parse_date, CURRENT_CONGRESS +from utils import download, load_data, save_data, parse_date def run(): diff --git a/scripts/cspan.py b/scripts/cspan.py index 08a4647..5f848a3 100755 --- a/scripts/cspan.py +++ b/scripts/cspan.py @@ -3,18 +3,14 @@ # Update current cspan IDs using NYT Congress API. import json, urllib.request, urllib.parse, urllib.error -import utils -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data def run(): - # default to not caching - cache = utils.flags().get('cache', False) - force = not cache - # load in current members y = load_data("legislators-current.yaml") for m in y: # retrieve C-SPAN id, if available, from NYT API + # TODO: use utils.download here response = urllib.request.urlopen("http://politics.nytimes.com/congress/svc/politics/v3/us/legislative/congress/members/%s.json" % m['id']['bioguide']).read() j = json.loads(response.decode("utf8")) cspan = j['results'][0]['cspan_id'] diff --git a/scripts/historical_committees.py b/scripts/historical_committees.py index 5d96021..1b61329 100755 --- a/scripts/historical_committees.py +++ b/scripts/historical_committees.py @@ -5,7 +5,7 @@ # the committees-historical.yaml file. It will include current committees # as well. -import re, itertools +import re from collections import OrderedDict import utils from utils import download, load_data, save_data, CURRENT_CONGRESS diff --git a/scripts/house_contact_list.py b/scripts/house_contact_list.py index c35ab70..952ead4 100755 --- a/scripts/house_contact_list.py +++ b/scripts/house_contact_list.py @@ -6,15 +6,12 @@ import csv, re import utils -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data def run(): house_labels = "labels-113.csv" - # default to not caching - cache = utils.flags().get('cache', False) - force = not cache names = utils.flags().get('names', False) y = load_data("legislators-current.yaml") diff --git a/scripts/house_contacts.py b/scripts/house_contacts.py index 8536315..e4b87c3 100755 --- a/scripts/house_contacts.py +++ b/scripts/house_contacts.py @@ -3,8 +3,8 @@ # Update current congressmen's mailing address from clerk.house.gov. import lxml.html, io -import re, sys -from datetime import date, datetime +import re +from datetime import datetime import utils from utils import download, load_data, save_data, parse_date diff --git a/scripts/house_history.py b/scripts/house_history.py index 620972d..871a174 100755 --- a/scripts/house_history.py +++ b/scripts/house_history.py @@ -9,20 +9,12 @@ # --bioguide: do *only* a single legislator import lxml.html, io -import datetime -import re import utils import requests -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data def run(): - debug = utils.flags().get('debug', False) - - # default to caching - cache = utils.flags().get('cache', True) - force = not cache - # pick either current or historical # order is important here, since current defaults to true if utils.flags().get('historical', False): diff --git a/scripts/house_history_gender.py b/scripts/house_history_gender.py index e47b69e..89d4cd4 100644 --- a/scripts/house_history_gender.py +++ b/scripts/house_history_gender.py @@ -1,5 +1,5 @@ import re, urllib.request, urllib.parse -from utils import yaml_load, yaml_dump, data_dir +from utils import yaml_load, yaml_dump def run(): diff --git a/scripts/house_websites.py b/scripts/house_websites.py index 1553906..cd6414b 100755 --- a/scripts/house_websites.py +++ b/scripts/house_websites.py @@ -10,7 +10,7 @@ import lxml.html, io, urllib.request, urllib.error, urllib.parse import re import utils -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data def run(): diff --git a/scripts/icpsr_ids.py b/scripts/icpsr_ids.py index e578b92..7ddd302 100755 --- a/scripts/icpsr_ids.py +++ b/scripts/icpsr_ids.py @@ -7,21 +7,14 @@ # --bioguide: load only one legislator, by his/her bioguide ID # --congress: do *only* updates for legislators serving in specific congress -import datetime -import re import utils -import urllib.request, urllib.error, urllib.parse -import requests -from utils import download, load_data, save_data, parse_date, states, congress_from_legislative_year, legislative_year -import json +from utils import load_data, save_data, parse_date import string import csv import unicodedata def run(): - debug = utils.flags().get('debug', False) - # default to caching cache = utils.flags().get('cache', True) force = not cache @@ -31,8 +24,6 @@ def run(): congress = utils.flags().get('congress',None) - filename_historical = "legislators-historical.yaml" - filename_current = "legislators-current.yaml" data_files = [] print("Loading %s..." % "legislators-current.yaml") diff --git a/scripts/influence_ids.py b/scripts/influence_ids.py index 2fce522..714ed46 100755 --- a/scripts/influence_ids.py +++ b/scripts/influence_ids.py @@ -7,12 +7,8 @@ # --current: do *only* current legislators (default: true) # --historical: do *only* historical legislators (default: false) -import datetime -import re import utils -import urllib.request, urllib.error, urllib.parse -import requests -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data import json def run(): diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 51a0f3a..f301660 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -2,4 +2,5 @@ pyyaml scrapelib ipython lxml>=2.2 -cssselect \ No newline at end of file +cssselect +pyflakes \ No newline at end of file diff --git a/scripts/retire.py b/scripts/retire.py index c08df4c..8e278c0 100755 --- a/scripts/retire.py +++ b/scripts/retire.py @@ -7,8 +7,8 @@ # python retire.py bioguideID termEndDate import sys - -from utils import load_data, save_data, parse_date, pprint +import utils +import rtyaml def run(): if len(sys.argv) != 3: @@ -17,25 +17,25 @@ def run(): sys.exit() try: - parse_date(sys.argv[2]) + utils.parse_date(sys.argv[2]) except: print("Invalid date: ", sys.argv[2]) sys.exit() print("Loading current YAML...") - y = load_data("legislators-current.yaml") + y = utils.load_data("legislators-current.yaml") print("Loading historical YAML...") - y1 = load_data("legislators-historical.yaml") + y1 = utils.load_data("legislators-historical.yaml") for moc in y: if moc["id"].get("bioguide", None) != sys.argv[1]: continue print("Updating:") - pprint(moc["id"]) + rtyaml.pprint(moc["id"]) print() - pprint(moc["name"]) + rtyaml.pprint(moc["name"]) print() - pprint(moc["terms"][-1]) + rtyaml.pprint(moc["terms"][-1]) moc["terms"][-1]["end"] = sys.argv[2] @@ -45,8 +45,8 @@ def run(): break print("Saving changes...") - save_data(y, "legislators-current.yaml") - save_data(y1, "legislators-historical.yaml") + utils.save_data(y, "legislators-current.yaml") + utils.save_data(y1, "legislators-historical.yaml") if __name__ == '__main__': run() \ No newline at end of file diff --git a/scripts/rtyaml.py b/scripts/rtyaml.py index 3e172f4..2a9e938 100755 --- a/scripts/rtyaml.py +++ b/scripts/rtyaml.py @@ -39,7 +39,7 @@ # more lines starting with a '#', write back out the commend if the # same object is written with rtyaml.dump().) -import sys, re, io +import sys, re from collections import OrderedDict import yaml diff --git a/scripts/senate_contacts.py b/scripts/senate_contacts.py index a697029..f063c5d 100755 --- a/scripts/senate_contacts.py +++ b/scripts/senate_contacts.py @@ -3,9 +3,8 @@ # Update current senator's website and address from www.senate.gov. import lxml.etree, io -import urllib.request, urllib.parse, urllib.error import string, re -from datetime import date, datetime +from datetime import datetime import utils from utils import download, load_data, save_data, parse_date diff --git a/scripts/social_media.py b/scripts/social_media.py index ac583d7..46ba3f4 100755 --- a/scripts/social_media.py +++ b/scripts/social_media.py @@ -28,7 +28,7 @@ import csv, re import utils -from utils import download, load_data, save_data, parse_date +from utils import load_data, save_data import requests def main(): @@ -204,7 +204,6 @@ def main(): # even though we have their channel ID, do they also have a username? if ytobj['entry']['yt$username']['$t'] != ytobj['entry']['yt$userId']['$t']: if social['youtube'].lower() != ytobj['entry']['yt$username']['$t'].lower(): - old_name = social['youtube'] # YT accounts are case-insensitive. Preserve capitalization if possible. social['youtube'] = ytobj['entry']['yt$username']['$t'] print("\tAdded YouTube username of %s" % social['youtube']) diff --git a/scripts/sweep_memberships.py b/scripts/sweep_memberships.py index f360a51..cdbc4d0 100755 --- a/scripts/sweep_memberships.py +++ b/scripts/sweep_memberships.py @@ -1,9 +1,6 @@ #!/usr/bin/env python -import re, lxml.html, lxml.etree, io, datetime -from collections import OrderedDict -import utils -from utils import download, load_data, save_data, parse_date, CURRENT_CONGRESS +from utils import load_data, save_data def run(): # load in members, orient by bioguide ID diff --git a/scripts/thomas_ids.py b/scripts/thomas_ids.py index 8522818..5f67e95 100755 --- a/scripts/thomas_ids.py +++ b/scripts/thomas_ids.py @@ -5,10 +5,9 @@ # IDs because name matching is hard. import lxml.html, io, urllib.request, urllib.parse, urllib.error -import re, sys -from datetime import date, datetime +import re import utils -from utils import download, load_data, save_data, parse_date +from utils import download, load_data, save_data def run(): CONGRESS_ID = "113th Congress (2013-2014)" # the query string parameter diff --git a/scripts/untire.py b/scripts/untire.py index c7034c5..1909ddf 100755 --- a/scripts/untire.py +++ b/scripts/untire.py @@ -7,8 +7,8 @@ # python unretire.py bioguideID import sys - -from utils import load_data, save_data, pprint +import rtyaml +import utils from collections import OrderedDict def run(): @@ -19,17 +19,17 @@ def run(): sys.exit() print("Loading current YAML...") - y = load_data("legislators-current.yaml") + y = utils.load_data("legislators-current.yaml") print("Loading historical YAML...") - y1 = load_data("legislators-historical.yaml") + y1 = utils.load_data("legislators-historical.yaml") for moc in y1: if moc["id"].get("bioguide", None) != sys.argv[1]: continue print("Updating:") - pprint(moc["id"]) + rtyaml.pprint(moc["id"]) print() - pprint(moc["name"]) + rtyaml.pprint(moc["name"]) moc["terms"].append(OrderedDict([ ("type", moc["terms"][-1]["type"]), @@ -45,8 +45,8 @@ def run(): break print("Saving changes...") - save_data(y, "legislators-current.yaml") - save_data(y1, "legislators-historical.yaml") + utils.save_data(y, "legislators-current.yaml") + utils.save_data(y1, "legislators-historical.yaml") if __name__ == '__main__': run() \ No newline at end of file diff --git a/scripts/utils.py b/scripts/utils.py index cb88c80..ac720fd 100755 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -74,13 +74,11 @@ from datetime import datetime import time import lxml.html # for meta redirect parsing - import yaml import smtplib import email.utils from email.mime.text import MIMEText -import getpass # read in an opt-in config file for supplying email settings @@ -92,10 +90,6 @@ else: email_settings = None -def current_congress(): - year = current_legislative_year() - return congress_from_legislative_year(year) - def congress_from_legislative_year(year): return ((year + 1) / 2) - 894 @@ -103,8 +97,6 @@ def legislative_year(date=None): if not date: date = datetime.datetime.now() - year = date.year - if date.month == 1: if date.day == 1 or date.day == 2: return date.year - 1 @@ -199,7 +191,7 @@ def download(url, destination=None, force=False, options=None): else: response = scraper.urlopen(url) body = str(response) # ensure is unicode not bytes - except scrapelib.HTTPError as e: + except scrapelib.HTTPError: log("Error downloading %s" % url) return None @@ -229,6 +221,8 @@ def download(url, destination=None, force=False, options=None): return body +from pytz import timezone +eastern_time_zone = timezone('US/Eastern') def format_datetime(obj): if isinstance(obj, datetime.datetime): return eastern_time_zone.localize(obj.replace(microsecond=0)).isoformat() @@ -337,10 +331,6 @@ def yaml_dump(data, path): h = hashlib.sha1(open(path, 'rb').read()).hexdigest() pickle.dump({ "hash": h, "data": data }, open(path+".pickle", "wb")) -def pprint(data): - rtyaml.pprint(data) - - # if email settings are supplied, email the text - otherwise, just print it def admin(body): try: @@ -356,10 +346,6 @@ def admin(body): print("Exception logging message to admin, halting as to avoid loop") print(format_exception(exception)) -def format_exception(exception): - exc_type, exc_value, exc_traceback = sys.exc_info() - return "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) - # this should only be called if the settings are definitely there def send_email(message): print("Sending email to %s..." % email_settings['to'])