diff --git a/.gitignore b/.gitignore index 56c6162..4767d3b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /scripts/cache *.pickle .DS_Store +/scripts/email/config.yml diff --git a/scripts/email/config.yml.example b/scripts/email/config.yml.example new file mode 100644 index 0000000..b38e78e --- /dev/null +++ b/scripts/email/config.yml.example @@ -0,0 +1,12 @@ +# email: +# # smtp details +# hostname: +# port: +# user_name: +# password: +# starttls: +# # email defaults +# subject: "[unitedstates/congress-legislators] Notice" +# from: +# from_name: "unitedstates" +# to: \ No newline at end of file diff --git a/scripts/social_media.py b/scripts/social_media.py index d9cc6e6..6896d8e 100755 --- a/scripts/social_media.py +++ b/scripts/social_media.py @@ -20,6 +20,9 @@ # other options: # --service (required): "twitter", "youtube", or "facebook" # --bioguide: limit to only one particular member +# --email: +# in conjunction with --sweep, send an email if there are any new leads, using +# settings in scripts/email/config.yml (if it was created and filled out). # uses a CSV at data/social_media_blacklist.csv to exclude known non-individual account names @@ -46,6 +49,7 @@ def main(): ] } + email_enabled = utils.flags().get('email', False) debug = utils.flags().get('debug', False) do_update = utils.flags().get('update', False) do_clean = utils.flags().get('clean', False) @@ -212,13 +216,20 @@ def main(): writer = csv.writer(open("cache/social_media/%s_candidates.csv" % service, 'w')) writer.writerow(["bioguide", "official_full", "website", "service", "candidate", "candidate_url"]) - for bioguide in to_check: - candidate = candidate_for(bioguide) - if candidate: - url = current_bioguide[bioguide]["terms"][-1].get("url", None) - candidate_url = "https://%s.com/%s" % (service, candidate) - writer.writerow([bioguide, current_bioguide[bioguide]['name']['official_full'].encode('utf-8'), url, service, candidate, candidate_url]) - print "\tWrote: %s" % candidate + if len(to_check) > 0: + email_body = "Social media leads found:\n\n" + for bioguide in to_check: + candidate = candidate_for(bioguide) + if candidate: + url = current_bioguide[bioguide]["terms"][-1].get("url", None) + candidate_url = "https://%s.com/%s" % (service, candidate) + row = [bioguide, current_bioguide[bioguide]['name']['official_full'].encode('utf-8'), url, service, candidate, candidate_url] + writer.writerow(row) + print "\tWrote: %s" % candidate + email_body += ("%s\n" % row) + + if email_enabled: + utils.send_email(email_body) def verify(): bioguide = utils.flags().get('bioguide', None) diff --git a/scripts/utils.py b/scripts/utils.py index 2b553d3..11d1b84 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -7,9 +7,27 @@ import os, errno, sys, traceback import re, htmlentitydefs import pprint from datetime import datetime +import time import lxml.html # for meta redirect parsing +import yaml + +import smtplib +import email.utils +from email.mime.text import MIMEText +import getpass + + +# read in an opt-in config file for supplying email settings +# returns None if it's not there, and this should always be handled gracefully +path = "email/config.yml" +if os.path.exists(path): + email_settings = yaml.load(open(path, 'r')).get('email', None) +else: + email_settings = None + + def parse_date(date): return datetime.strptime(date, "%Y-%m-%d").date() @@ -175,7 +193,6 @@ def unescape(text): # hooked to load mappings as OrderedDicts. Adapted from: # https://gist.github.com/317164 -import yaml try: from yaml import CSafeLoader as Loader, CDumper as Dumper except ImportError: @@ -270,3 +287,47 @@ def yaml_dump(data, path): def pprint(data): yaml.dump(data, sys.stdout, default_flow_style=False, allow_unicode=True) + +# if email settings are supplied, email the text - otherwise, just print it +def admin(body): + try: + if isinstance(body, Exception): + body = format_exception(body) + + print body # always print it + + if email_settings: + send_email(body) + + except Exception as exception: + print "Exception logging message to admin, halting as to avoid loop" + print format_exception(exception) + +def format_exception(exception): + exc_type, exc_value, exc_traceback = sys.exc_info() + return "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) + +# this should only be called if the settings are definitely there +def send_email(message): + print "Sending email to %s..." % email_settings['to'] + + # adapted from http://www.doughellmann.com/PyMOTW/smtplib/ + msg = MIMEText(message) + msg.set_unixfrom('author') + msg['To'] = email.utils.formataddr(('Recipient', email_settings['to'])) + msg['From'] = email.utils.formataddr((email_settings['from_name'], email_settings['from'])) + msg['Subject'] = "%s - %i" % (email_settings['subject'], int(time.time())) + + server = smtplib.SMTP(email_settings['hostname']) + try: + server.ehlo() + if email_settings['starttls'] and server.has_extn('STARTTLS'): + server.starttls() + server.ehlo() + + server.login(email_settings['user_name'], email_settings['password']) + server.sendmail(email_settings['from'], [email_settings['to']], msg.as_string()) + finally: + server.quit() + + print "Sent email to %s." % email_settings['to'] \ No newline at end of file