Few more prefab patterns for amendment_code.py

This commit is contained in:
wilson428
2013-03-25 23:41:27 -04:00
parent 40b3168cb9
commit 22045725ce
4 changed files with 30 additions and 9 deletions

View File

@@ -1,5 +1,7 @@
import json, re
import utils
import logging
'''
the goal here is to convert the natural language of the amendment into code
@@ -12,8 +14,10 @@ tremendous NLP chops.
#certain amendment structures are so common that it's wisest -- that is, easiest -- to define explicitly
prefabs = [
("(On page (\d+), line (\d+), ([a-z]+) the amount by \$([\d,]+)\.)", ["verbatim", "page", "line", "action", "content"]),
("(At the (end) of (.*?), ([a-z]+) the following:(.*))", ["verbatim", "direction", "location", "action", "content"])
("(On page (\d+), lines? (\d+), ([a-z]+) the amount by \$([\d,]+)\.)", ["verbatim", "page", "line", "action", "content"]),
("(On page (\d+), (.*?)lines? (\d+)(.*?)(?:\.|;))", ["verbatim", "page", "action", "line", "content"]),
("(At the (end) of (.*?), ([a-z]+) the following:?(.*))", ["verbatim", "direction", "location", "action", "content"]),
("(At the (appropriate place),? ([a-z]+) the following:?(.*))", ["verbatim", "location", "action", "content"])
]
#a list of verbs that will translate to functions
@@ -23,6 +27,12 @@ actions = ["strike", "insert", "delete", "increase", "decrease"]
directions = ["after", "at the end"]
def parse_amendment_text(amendment, bill):
if amendment["info"]["amends"]["document_type"] != "bill":
logging.info("amendment %s does not amend the bill itself." % amendment["info"]["amendment_id"])
return amendment
amendment["text"] = re.sub("\s+", " ", amendment["text"], re.S)
# parse the intention of the amendment
amendment["commands"] = []
commands = []
@@ -31,14 +41,18 @@ def parse_amendment_text(amendment, bill):
#check for prefab patterns
for prefab in prefabs:
temp = re.findall(prefab[0], amendment["text"], re.I | re.S)
if temp:
if temp:
for match in temp:
command = dict([(x[1], match[x[0]]) for x in enumerate(prefab[1])])
commands.append(command)
#remove the matched command to prevent double counting
amendment["text"] = amendment["text"].replace(command["verbatim"], "")
# for amendments that reference a place in the legislation instead of line number, resolve to location
for command in commands:
if len(command["verbatim"]) > 1000:
command["verbatim"] = "<em>Text of amendment too long (" + str(len(command["verbatim"].split(" "))) + " words).</em>"
if "line" in command and "page" in command:
amendment["commands"].append(command)
elif "location" in command: