fix up tasks to pass pyflakes

This commit is contained in:
Eric Mill
2014-04-03 13:15:38 -04:00
parent a7ff75442e
commit 0a27c8843a
21 changed files with 47 additions and 96 deletions

View File

@@ -1,6 +1,6 @@
import csv
import json
from utils import write, format_datetime, load_data
import utils
def run():
@@ -57,15 +57,15 @@ def run():
print("Loading %s..." %yaml_social)
social = load_data(yaml_social)
social = utils.load_data(yaml_social)
for filename in yamls:
print("Loading %s..." % filename)
legislators = load_data(filename)
legislators = utils.load_data(filename)
#convert yaml to json
write(
json.dumps(legislators, sort_keys=True, indent=2, default=format_datetime),
utils.write(
json.dumps(legislators, sort_keys=True, indent=2, default=utils.format_datetime),
"../alternate_formats/%s.json" %filename.rstrip(".yaml"))
#convert yaml to csv

View File

@@ -15,7 +15,7 @@ import lxml.html, io
import datetime
import re
import utils
from utils import download, load_data, save_data, parse_date
from utils import download, load_data, save_data
def run():
@@ -99,8 +99,6 @@ def run():
relationships.append({ "relation": relationship, "name": person})
return relationships
debug = utils.flags().get('debug', False)
# default to caching
cache = utils.flags().get('cache', True)
force = not cache

View File

@@ -7,7 +7,7 @@
import re, lxml.html, lxml.etree, io, datetime
from collections import OrderedDict
import utils
from utils import download, load_data, save_data, parse_date, CURRENT_CONGRESS
from utils import download, load_data, save_data, parse_date
def run():

View File

@@ -3,18 +3,14 @@
# Update current cspan IDs using NYT Congress API.
import json, urllib.request, urllib.parse, urllib.error
import utils
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
def run():
# default to not caching
cache = utils.flags().get('cache', False)
force = not cache
# load in current members
y = load_data("legislators-current.yaml")
for m in y:
# retrieve C-SPAN id, if available, from NYT API
# TODO: use utils.download here
response = urllib.request.urlopen("http://politics.nytimes.com/congress/svc/politics/v3/us/legislative/congress/members/%s.json" % m['id']['bioguide']).read()
j = json.loads(response.decode("utf8"))
cspan = j['results'][0]['cspan_id']

View File

@@ -5,7 +5,7 @@
# the committees-historical.yaml file. It will include current committees
# as well.
import re, itertools
import re
from collections import OrderedDict
import utils
from utils import download, load_data, save_data, CURRENT_CONGRESS

View File

@@ -6,15 +6,12 @@
import csv, re
import utils
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
def run():
house_labels = "labels-113.csv"
# default to not caching
cache = utils.flags().get('cache', False)
force = not cache
names = utils.flags().get('names', False)
y = load_data("legislators-current.yaml")

View File

@@ -3,8 +3,8 @@
# Update current congressmen's mailing address from clerk.house.gov.
import lxml.html, io
import re, sys
from datetime import date, datetime
import re
from datetime import datetime
import utils
from utils import download, load_data, save_data, parse_date

View File

@@ -9,20 +9,12 @@
# --bioguide: do *only* a single legislator
import lxml.html, io
import datetime
import re
import utils
import requests
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
def run():
debug = utils.flags().get('debug', False)
# default to caching
cache = utils.flags().get('cache', True)
force = not cache
# pick either current or historical
# order is important here, since current defaults to true
if utils.flags().get('historical', False):

View File

@@ -1,5 +1,5 @@
import re, urllib.request, urllib.parse
from utils import yaml_load, yaml_dump, data_dir
from utils import yaml_load, yaml_dump
def run():

View File

@@ -10,7 +10,7 @@
import lxml.html, io, urllib.request, urllib.error, urllib.parse
import re
import utils
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
def run():

View File

@@ -7,21 +7,14 @@
# --bioguide: load only one legislator, by his/her bioguide ID
# --congress: do *only* updates for legislators serving in specific congress
import datetime
import re
import utils
import urllib.request, urllib.error, urllib.parse
import requests
from utils import download, load_data, save_data, parse_date, states, congress_from_legislative_year, legislative_year
import json
from utils import load_data, save_data, parse_date
import string
import csv
import unicodedata
def run():
debug = utils.flags().get('debug', False)
# default to caching
cache = utils.flags().get('cache', True)
force = not cache
@@ -31,8 +24,6 @@ def run():
congress = utils.flags().get('congress',None)
filename_historical = "legislators-historical.yaml"
filename_current = "legislators-current.yaml"
data_files = []
print("Loading %s..." % "legislators-current.yaml")

View File

@@ -7,12 +7,8 @@
# --current: do *only* current legislators (default: true)
# --historical: do *only* historical legislators (default: false)
import datetime
import re
import utils
import urllib.request, urllib.error, urllib.parse
import requests
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
import json
def run():

View File

@@ -2,4 +2,5 @@ pyyaml
scrapelib
ipython
lxml>=2.2
cssselect
cssselect
pyflakes

View File

@@ -7,8 +7,8 @@
# python retire.py bioguideID termEndDate
import sys
from utils import load_data, save_data, parse_date, pprint
import utils
import rtyaml
def run():
if len(sys.argv) != 3:
@@ -17,25 +17,25 @@ def run():
sys.exit()
try:
parse_date(sys.argv[2])
utils.parse_date(sys.argv[2])
except:
print("Invalid date: ", sys.argv[2])
sys.exit()
print("Loading current YAML...")
y = load_data("legislators-current.yaml")
y = utils.load_data("legislators-current.yaml")
print("Loading historical YAML...")
y1 = load_data("legislators-historical.yaml")
y1 = utils.load_data("legislators-historical.yaml")
for moc in y:
if moc["id"].get("bioguide", None) != sys.argv[1]: continue
print("Updating:")
pprint(moc["id"])
rtyaml.pprint(moc["id"])
print()
pprint(moc["name"])
rtyaml.pprint(moc["name"])
print()
pprint(moc["terms"][-1])
rtyaml.pprint(moc["terms"][-1])
moc["terms"][-1]["end"] = sys.argv[2]
@@ -45,8 +45,8 @@ def run():
break
print("Saving changes...")
save_data(y, "legislators-current.yaml")
save_data(y1, "legislators-historical.yaml")
utils.save_data(y, "legislators-current.yaml")
utils.save_data(y1, "legislators-historical.yaml")
if __name__ == '__main__':
run()

View File

@@ -39,7 +39,7 @@
# more lines starting with a '#', write back out the commend if the
# same object is written with rtyaml.dump().)
import sys, re, io
import sys, re
from collections import OrderedDict
import yaml

View File

@@ -3,9 +3,8 @@
# Update current senator's website and address from www.senate.gov.
import lxml.etree, io
import urllib.request, urllib.parse, urllib.error
import string, re
from datetime import date, datetime
from datetime import datetime
import utils
from utils import download, load_data, save_data, parse_date

View File

@@ -28,7 +28,7 @@
import csv, re
import utils
from utils import download, load_data, save_data, parse_date
from utils import load_data, save_data
import requests
def main():
@@ -204,7 +204,6 @@ def main():
# even though we have their channel ID, do they also have a username?
if ytobj['entry']['yt$username']['$t'] != ytobj['entry']['yt$userId']['$t']:
if social['youtube'].lower() != ytobj['entry']['yt$username']['$t'].lower():
old_name = social['youtube']
# YT accounts are case-insensitive. Preserve capitalization if possible.
social['youtube'] = ytobj['entry']['yt$username']['$t']
print("\tAdded YouTube username of %s" % social['youtube'])

View File

@@ -1,9 +1,6 @@
#!/usr/bin/env python
import re, lxml.html, lxml.etree, io, datetime
from collections import OrderedDict
import utils
from utils import download, load_data, save_data, parse_date, CURRENT_CONGRESS
from utils import load_data, save_data
def run():
# load in members, orient by bioguide ID

View File

@@ -5,10 +5,9 @@
# IDs because name matching is hard.
import lxml.html, io, urllib.request, urllib.parse, urllib.error
import re, sys
from datetime import date, datetime
import re
import utils
from utils import download, load_data, save_data, parse_date
from utils import download, load_data, save_data
def run():
CONGRESS_ID = "113th Congress (2013-2014)" # the query string parameter

View File

@@ -7,8 +7,8 @@
# python unretire.py bioguideID
import sys
from utils import load_data, save_data, pprint
import rtyaml
import utils
from collections import OrderedDict
def run():
@@ -19,17 +19,17 @@ def run():
sys.exit()
print("Loading current YAML...")
y = load_data("legislators-current.yaml")
y = utils.load_data("legislators-current.yaml")
print("Loading historical YAML...")
y1 = load_data("legislators-historical.yaml")
y1 = utils.load_data("legislators-historical.yaml")
for moc in y1:
if moc["id"].get("bioguide", None) != sys.argv[1]: continue
print("Updating:")
pprint(moc["id"])
rtyaml.pprint(moc["id"])
print()
pprint(moc["name"])
rtyaml.pprint(moc["name"])
moc["terms"].append(OrderedDict([
("type", moc["terms"][-1]["type"]),
@@ -45,8 +45,8 @@ def run():
break
print("Saving changes...")
save_data(y, "legislators-current.yaml")
save_data(y1, "legislators-historical.yaml")
utils.save_data(y, "legislators-current.yaml")
utils.save_data(y1, "legislators-historical.yaml")
if __name__ == '__main__':
run()

View File

@@ -74,13 +74,11 @@ from datetime import datetime
import time
import lxml.html # for meta redirect parsing
import yaml
import smtplib
import email.utils
from email.mime.text import MIMEText
import getpass
# read in an opt-in config file for supplying email settings
@@ -92,10 +90,6 @@ else:
email_settings = None
def current_congress():
year = current_legislative_year()
return congress_from_legislative_year(year)
def congress_from_legislative_year(year):
return ((year + 1) / 2) - 894
@@ -103,8 +97,6 @@ def legislative_year(date=None):
if not date:
date = datetime.datetime.now()
year = date.year
if date.month == 1:
if date.day == 1 or date.day == 2:
return date.year - 1
@@ -199,7 +191,7 @@ def download(url, destination=None, force=False, options=None):
else:
response = scraper.urlopen(url)
body = str(response) # ensure is unicode not bytes
except scrapelib.HTTPError as e:
except scrapelib.HTTPError:
log("Error downloading %s" % url)
return None
@@ -229,6 +221,8 @@ def download(url, destination=None, force=False, options=None):
return body
from pytz import timezone
eastern_time_zone = timezone('US/Eastern')
def format_datetime(obj):
if isinstance(obj, datetime.datetime):
return eastern_time_zone.localize(obj.replace(microsecond=0)).isoformat()
@@ -337,10 +331,6 @@ def yaml_dump(data, path):
h = hashlib.sha1(open(path, 'rb').read()).hexdigest()
pickle.dump({ "hash": h, "data": data }, open(path+".pickle", "wb"))
def pprint(data):
rtyaml.pprint(data)
# if email settings are supplied, email the text - otherwise, just print it
def admin(body):
try:
@@ -356,10 +346,6 @@ def admin(body):
print("Exception logging message to admin, halting as to avoid loop")
print(format_exception(exception))
def format_exception(exception):
exc_type, exc_value, exc_traceback = sys.exc_info()
return "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
# this should only be called if the settings are definitely there
def send_email(message):
print("Sending email to %s..." % email_settings['to'])