mirror of
https://github.com/unitedstates/congress-legislators.git
synced 2025-12-19 09:50:37 -05:00
147 lines
7.3 KiB
Python
147 lines
7.3 KiB
Python
#!/usr/bin/env python
|
|
#coding: utf-8
|
|
__author__ = 'stsmith'
|
|
|
|
# congress_lookup: Look up information about congress from the congress-legislators database
|
|
# See: https://github.com/unitedstates/congress-legislators and https://github.com/TheWalkers/congress-legislators
|
|
|
|
# The project is in the public domain within the United States, and
|
|
# copyright and related rights in the work worldwide are waived
|
|
# through the CC0 1.0 Universal public domain dedication.
|
|
|
|
# Author 2017 Steven T. Smith <steve dot t dot smith at gmail dot com>
|
|
|
|
import argparse as ap, contextlib, fnmatch, os, sys, time, warnings, yaml
|
|
|
|
# version dependent libraries
|
|
# https://docs.python.org/2/library/urllib.html
|
|
# https://docs.python.org/3.0/library/urllib.parse.html
|
|
if (sys.version_info > (3, 0)):
|
|
from urllib.request import urlopen
|
|
import urllib.parse as urlparse
|
|
else:
|
|
from urllib2 import urlopen
|
|
import urlparse
|
|
|
|
class CongressLookup:
|
|
'''A class used to lookup legislator properties from the github congress-legislators YAML database.'''
|
|
|
|
def __init__(self):
|
|
self.args = self.parseArgs()
|
|
self.data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),self.args.data_dir)
|
|
self.properties = dict()
|
|
self.database_load()
|
|
for prop in self.args.properties: self.lookup_property(prop)
|
|
|
|
def parseArgs(self):
|
|
parser = ap.ArgumentParser()
|
|
parser.add_argument('properties', metavar='PROPS', type=str, nargs='+',
|
|
help='Properties to look up')
|
|
parser.add_argument('-c', '--committee', help="Committee name (wildcard)", type=str, default=None)
|
|
parser.add_argument('-n', '--last-name', help="Last name of legislator (wildcard)", type=str, default=None)
|
|
parser.add_argument('-d', '--data-dir', help="Database directory", type=str, default='.')
|
|
parser.add_argument('-r', '--repo', help="GitHub repo URL", type=str, default='https://github.com/unitedstates/congress-legislators/')
|
|
parser.add_argument('-T', '--current-term', help="Properties from only the current term", action='store_true')
|
|
parser.add_argument('-D', '--download', help="Download data", action='store_true', default=False)
|
|
parser.add_argument('-g', '--debug', help="Debug flag", action='store_true')
|
|
return parser.parse_args()
|
|
|
|
def lookup_property(self,property):
|
|
if self.args.committee is not None:
|
|
self.lookup_by_committee(property)
|
|
if self.args.last_name is not None:
|
|
self.lookup_by_lastname(property)
|
|
|
|
def lookup_by_committee(self,property):
|
|
for comm in (comm for comm in self.committees if self.inclusive_wildcard_match(comm['name'],self.args.committee)):
|
|
if self.args.debug: print(comm)
|
|
print('"{}" member properties:'.format(comm['name'].encode('utf-8')))
|
|
members = self.membership[comm['thomas_id']] if comm['thomas_id'] in self.membership else []
|
|
for member in members: self.lookup_by_member(property,member)
|
|
|
|
def inclusive_wildcard_match(self,name,pat):
|
|
if any(c in pat for c in '*?[]'): # a wildcard pattern
|
|
# prepend or append a * for inclusiveness if not already there
|
|
if pat[0] != '*': pat = '*' + pat
|
|
if pat[-1] != '*': pat = pat + '*'
|
|
else: # not a wildcard
|
|
pat = '*' + pat + '*'
|
|
return fnmatch.fnmatch(name,pat)
|
|
|
|
def lookup_by_member(self,property,member):
|
|
for leg in ( leg for leg in self.legislators if \
|
|
(leg['name']['official_full'] == member['name']) \
|
|
or ('bioguide' in leg['id'] and 'bioguide' in member and leg['id']['bioguide'] == member['bioguide']) \
|
|
or ('thomas' in leg['id'] and 'thomas' in member and leg['id']['thomas'] == member['thomas']) ):
|
|
self.lookup_legislator_properties(property,leg)
|
|
|
|
def lookup_by_lastname(self,property):
|
|
for leg in (leg for leg in self.legislators if fnmatch.fnmatch(leg['name']['last'],self.args.last_name)):
|
|
if self.args.debug: print(leg)
|
|
self.lookup_legislator_properties(property,leg)
|
|
|
|
def lookup_legislator_properties(self,property,legislator):
|
|
self.properties[property] = set([term[property] for term in legislator['terms'] if self.lookup_filter(property,term)])
|
|
for off in self.offices:
|
|
if self.args.debug: print(off)
|
|
if any(off['id'][db] == legislator['id'][db] for db in off['id'] if db in off['id'] and db in legislator['id']):
|
|
self.properties[property] |= set([ok[property] for ok in off['offices'] if property in ok and len(ok[property]) > 0])
|
|
break
|
|
print('Property \'{}\' for {}:'.format(property,legislator['name']['official_full'].encode('utf-8')))
|
|
print('\n'.join(sorted(self.properties[property])))
|
|
|
|
def lookup_filter(self,property,term):
|
|
result = property in term and len(term[property]) > 0
|
|
if result and self.args.current_term:
|
|
result &= 'end' in term and time.strptime(term['end'],'%Y-%m-%d') >= time.localtime()
|
|
return result
|
|
|
|
def database_load(self):
|
|
try:
|
|
with self.database_access('legislators-current.yaml') as y:
|
|
self.legislators = self.yaml_load(y, Loader=yaml.CLoader)
|
|
with self.database_access('legislators-district-offices.yaml') as y:
|
|
self.offices = self.yaml_load(y, Loader=yaml.CLoader)
|
|
if self.args.committee is not None:
|
|
with self.database_access('committees-current.yaml') as y:
|
|
self.committees = self.yaml_load(y, Loader=yaml.CLoader)
|
|
with self.database_access('committee-membership-current.yaml') as y:
|
|
self.membership = self.yaml_load(y, Loader=yaml.CLoader)
|
|
else:
|
|
self.committees = None
|
|
except (BaseException,IOError) as e:
|
|
print(e)
|
|
raise Exception('Clone data from {} and copy it to {} .'.format(self.args.repo,self.data_path))
|
|
|
|
def yaml_load(self,y,Loader=yaml.loader.Loader):
|
|
res = yaml.load(y, Loader=Loader)
|
|
if res is None: res = [] # make it an empty iterable
|
|
return res
|
|
|
|
def database_access(self,filename):
|
|
if self.args.download:
|
|
if self.args.repo[-1] != '/': self.args.repo += '/'
|
|
url_base = urlparse.urljoin(urlparse.urlunparse(urlparse.urlparse(self.args.repo)._replace(netloc='raw.githubusercontent.com')),'main/')
|
|
# contextlib required for urlopen in with ... as for v < 3.3
|
|
res = contextlib.closing(urlopen( urlparse.urljoin(url_base,filename) ))
|
|
else:
|
|
fname_fullpath = os.path.join(self.data_path,filename)
|
|
if os.path.exists(fname_fullpath):
|
|
res = open(fname_fullpath,'r')
|
|
else:
|
|
warnings.warn('File {} doesn\'t exist; clone data from {} and copy it to {} .'.format(filename,self.args.repo,self.data_path))
|
|
res = self.Emptysource()
|
|
return res
|
|
|
|
class Emptysource(object):
|
|
def read(self, size):
|
|
return '' # empty
|
|
def write(self, data):
|
|
pass # ignore the data
|
|
def __enter__(self): return self
|
|
def __exit__(*x): pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
res = CongressLookup()
|