clean up some extraneous files

This commit is contained in:
Dan Drinkard
2014-04-22 17:39:36 -04:00
parent 5c1daa400e
commit fb558849ef
3 changed files with 0 additions and 938 deletions

View File

@@ -1,113 +0,0 @@
'''
HOW TO USE:
This file:
Extracts the option "value" attributes, and creates a hash if the option's text does not match the option's value
Extracts all input name + value, and outputs a fill_in list (note that this will output for ALL <input> tags)
You should try to give this file inputs containing only the forms that you want to extract from.
Copy this file to a separate location (so as not to send random files
to github)
Create a folder named "input" in the same location
Copy the section of the HTML containing the form options that you
want to extract (does not have to be exact, you can even copy and paste
the entire HTML source code if you want, only there is less chance for
errors if you only copy/paste the section containing the form)
Paste into a text file (NOT word) and save in the "input" folder.
Run the .py file in python 3.3, and copy/paste the resulting output.
It is recommended to check the output for any errors. If the attribute is
in single quotes you can change this in the regular expressions below to
selectoptions: r'(?:<option.*?value\s*=\s*)(\'.*?\')'
inputname: r'(?:<input.*?name\s*=\s*\')(.*?)(?:\')'
inputselectors: r'(?:<input.*?id\s*=\s*\')(.*?)(?:\')'
'''
import os
import re
YAMLselections = []
#compile regular expressions
#select = re.compile(r'<select.*?</select>', re.DOTALL | re.IGNORECASE)
selectoptions = re.compile(r'(?:<option.*?value\s*=\s*\")(.*?)(?:\")', re.IGNORECASE)
selecttext = re.compile(r'(?:<option.*?value.*?>)(.*?)(?:</)', re.IGNORECASE)
inputname = re.compile(r'(?:<input.*?name\s*=\s*\")(.*?)(?:\")', re.IGNORECASE)
inputselectors = re.compile(r'(?:<input.*?id\s*=\s*\")(.*?)(?:\")', re.IGNORECASE)
for root,dirs,files in os.walk('.\input'):
for file in files:
with open('.\input\\' + file, 'r') as f:
html = f.read()
#limit by form
#input search
data = '------------\n' + file + '\n' + 'inputs:' + '\n'
inputs = re.findall(inputname, html)
selectors = re.findall(inputselectors, html)
data = data + " " + "- fill_in:\n"
for i in range(0,len(inputs)):
try:
data = data + " " + "- name: " + inputs[i] + "\n"
data = data + " " + "selector: \"#" + selectors[i] + "\"\n"
data = data + " " + "value: "
if inputs[i].find("first") != -1:
data = data + "$NAME_FIRST\n"
elif inputs[i].find("last") != -1:
data = data + "$NAME_LAST\n"
elif inputs[i].find("zip5") != -1:
data = data + "$ADDRESS_ZIP5\n"
elif inputs[i].find("zip4") != -1:
data = data + "$ADDRESS_ZIP4\n"
elif inputs[i].find("prefix") != -1:
data = data + "$NAME_PREFIX\n"
elif inputs[i].find("address2") != -1:
data = data + "$ADDRESS_STREET_2\n"
elif inputs[i].find("address") != -1:
data = data + "$ADDRESS_STREET\n"
elif inputs[i].find("city") != -1:
data = data + "$ADDRESS_CITY\n"
elif inputs[i].find("email") != -1:
data = data + "$EMAIL\n"
elif inputs[i].find("phone") != -1:
data = data + "$PHONE\n"
elif inputs[i].find("subject") != -1:
data = data + "$SUBJECT\n"
elif inputs[i].find("message") != -1:
data = data + "$MESSAGE\n"
else:
data = data + "\n"
data = data + " " + "required: "
if inputs[i].find("required") != -1:
data = data + "Yes\n"
else:
data = data + "\n"
except IndexError:
data = data + "\ninputs may not match selectors\n"
data = data + " " + "- name: \n" + " " + "selector: \"#\n" + " " + "value: \n" + " " + "required: \n"
#option search
options = re.findall(selectoptions, html)
text = re.findall(selecttext, html)
data = data + '------------\n' + file + '\n' + 'options:' + '\n'
if len(options) != len(text):
data = data + "\nerror with text, printing option values only\n"
for option in options:
data = data + " "
data = data + "- \"" + option + "\"\n"
YAMLselections.append(data)
elif set(options) != set(text):
data = data + "\noptions != text, printing hash\n"
for i in range(0,len(options)):
data = data + " "
data = data + "\"" + text[i] + "\":" + " \"" + options[i] + "\"\n"
YAMLselections.append(data)
else:
for option in options:
data = data + " "
data = data + "- \"" + option + "\"\n"
YAMLselections.append(data)
for x in YAMLselections:
print(x)