import cStringIO import csv import codecs import decimal import datetime import json import re import hashlib COMMENTS_REGEX = re.compile("/\*.*?\*/") def gen_query_hash(sql): """Returns hash of the given query after stripping all comments, line breaks and multiple spaces, and lower casing all text. TODO: possible issue - the following queries will get the same id: 1. SELECT 1 FROM table WHERE column='Value'; 2. SELECT 1 FROM table where column='value'; """ sql = COMMENTS_REGEX.sub("", sql) sql = "".join(sql.split()).lower() return hashlib.md5(sql).hexdigest() class JSONEncoder(json.JSONEncoder): """Custom JSON encoding class, to handle Decimal and datetime.date instances. """ def default(self, o): if isinstance(o, decimal.Decimal): return float(o) if isinstance(o, datetime.date): return o.isoformat() super(JSONEncoder, self).default(o) class UnicodeWriter: """ A CSV writer which will write rows to CSV file "f", which is encoded in the given encoding. """ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def _encode_utf8(self, val): if isinstance(val, (unicode, str)): return val.encode('utf-8') return val def writerow(self, row): self.writer.writerow([self._encode_utf8(s) for s in row]) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode("utf-8") # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row)