classic rock data and code
This commit is contained in:
1
classic-rock/classic-rock-raw-data.csv
Normal file
1
classic-rock/classic-rock-raw-data.csv
Normal file
File diff suppressed because one or more lines are too long
1
classic-rock/classic-rock-song-list.csv
Normal file
1
classic-rock/classic-rock-song-list.csv
Normal file
File diff suppressed because one or more lines are too long
110
classic-rock/compiling_radio.py
Normal file
110
classic-rock/compiling_radio.py
Normal file
@@ -0,0 +1,110 @@
|
||||
def hr_pull(x,y):
|
||||
iteration = 0
|
||||
callsign = x[2]
|
||||
new_filename = callsign + ".txt"
|
||||
songlist = ""
|
||||
while (iteration < y):
|
||||
iteration = iteration + 1
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
readfile = open(filename,"r")
|
||||
newchunk = readfile.read()
|
||||
songlist = newchunk + "\n" + songlist
|
||||
readfile.close()
|
||||
writefile = open(new_filename,"w")
|
||||
writefile.write(songlist)
|
||||
writefile.close()
|
||||
|
||||
def hh_pull(x,y):
|
||||
iteration = 0
|
||||
callsign = x[2]
|
||||
new_filename = callsign + ".txt"
|
||||
songlist = ""
|
||||
while (iteration < y):
|
||||
iteration = iteration + 1
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
readfile = open(filename,"r")
|
||||
newchunk = readfile.read()
|
||||
songlist = newchunk + "\n" + songlist
|
||||
readfile.close()
|
||||
writefile = open(new_filename,"w")
|
||||
writefile.write(songlist)
|
||||
writefile.close()
|
||||
|
||||
def dy_pull(x,y):
|
||||
iteration = 0
|
||||
callsign = x[2]
|
||||
new_filename = callsign + ".txt"
|
||||
songlist = ""
|
||||
while (iteration < y):
|
||||
iteration = iteration + 1
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
readfile = open(filename,"r")
|
||||
newchunk = readfile.read()
|
||||
songlist = newchunk + "\n" + songlist
|
||||
readfile.close()
|
||||
writefile = open(new_filename,"w")
|
||||
writefile.write(songlist)
|
||||
writefile.close()
|
||||
|
||||
|
||||
cc1 = ("http://www.q1043.com/services/now_playing.html?streamId=1465&limit=25",0,"WAXQ","")
|
||||
cc2 = ("http://www.lonestar925.com/services/now_playing.html?streamId=3379&limit=25",0,"KZPS","")
|
||||
cc3 = ("http://www.wbig.com/services/now_playing.html?streamId=2505&limit=25",0,"WBIG","")
|
||||
cc4 = ("http://www.big1059.com/services/now_playing.html?streamId=557&limit=25",0,"WBGG","")
|
||||
cc5 = ("http://www.thefox.com/services/now_playing.html?streamId=393&limit=25",0,"KRFX","")
|
||||
cc6 = ("http://www.dve.com/services/now_playing.html?streamId=2017&limit=25",0,"WDVE","")
|
||||
cc7 = ("http://www.wrfx.com/services/now_playing.html?streamId=1613&limit=25",0,"WRFX","")
|
||||
cc8 = ("http://www.kzep.com/services/now_playing.html?streamId=4051&limit=25",0,"KZEP","")
|
||||
cc9 = ("http://www.101kgb.com/services/now_playing.html?streamId=237&limit=25",0,"KGB","")
|
||||
gm1 = ("http://www.wcsx.com/recentlyplayed.aspx",0,"WCSX","")
|
||||
gm2 = ("http://www.wmgk.com/broadcasthistory.aspx",0,"WMGK","")
|
||||
cx1 = ("http://www.1073theeagle.com/lsp/",0,"WXGL","")
|
||||
cx2 = ("http://www.houstonseagle.com/lsp/",0,"KGLK","")
|
||||
cx3 = ("http://www.971theriver.com/lsp/",0,"WSRV","")
|
||||
cb1 = ("http://wzlx.cbslocal.com/playlist/",0,"WZLX","")
|
||||
cb2 = ("http://wncx.cbslocal.com/playlist/",0,"WNCX","")
|
||||
cb3 = ("http://kzok.cbslocal.com/playlist/",0,"KZOK","")
|
||||
tg1 = ("http://wlup.tunegenie.com/onair/",0,"WLUP","")
|
||||
tg2 = ("http://wofx.tunegenie.com/onair/",0,"WOFX","")
|
||||
tg3 = ("http://kgon.tunegenie.com/onair/",0,"KGON","")
|
||||
tg4 = ("http://kcfx.tunegenie.com/onair/",0,"KCFX","")
|
||||
tg5 = ("http://klos.tunegenie.com/onair/",0,"KLOS","")
|
||||
tg6 = ("http://kseg.tunegenie.com/onair/",0,"KSEG","")
|
||||
tg7 = ("http://kufx.tunegenie.com/onair/",0,"KUFX","")
|
||||
ll1 = ("http://player.listenlive.co/24751/en/songhistory",0,"KQRS","")
|
||||
ll2 = ("http://player.listenlive.co/25951/en/songhistory",0,"KSAN","")
|
||||
ke1 = ("http://www.kshe95.com/broadcasthistory",0,"KSHE","")
|
||||
kx1 = ("http://kslx.com/playlist",0,"KSLX","")
|
||||
|
||||
hr_pull(cc1,121)
|
||||
hr_pull(cc2,121)
|
||||
hr_pull(cc3,121)
|
||||
hr_pull(cc4,121)
|
||||
hr_pull(cc5,121)
|
||||
hr_pull(cc6,121)
|
||||
hr_pull(cc7,121)
|
||||
hr_pull(cc8,121)
|
||||
hr_pull(cc9,121)
|
||||
hr_pull(tg1,121)
|
||||
hr_pull(tg2,121)
|
||||
hr_pull(tg3,121)
|
||||
hr_pull(tg4,121)
|
||||
hr_pull(tg5,121)
|
||||
hr_pull(tg6,121)
|
||||
hr_pull(tg7,121)
|
||||
hr_pull(ke1,121)
|
||||
hh_pull(cx1,241)
|
||||
hh_pull(cx2,241)
|
||||
hh_pull(cx3,241)
|
||||
hh_pull(ll1,241)
|
||||
hh_pull(ll2,241)
|
||||
hh_pull(kx1,241)
|
||||
dy_pull(gm1,6)
|
||||
dy_pull(gm2,6)
|
||||
dy_pull(cb1,6)
|
||||
dy_pull(cb2,6)
|
||||
dy_pull(cb3,6)
|
||||
|
||||
|
||||
print "Done"
|
||||
|
||||
407
classic-rock/radio.py
Normal file
407
classic-rock/radio.py
Normal file
@@ -0,0 +1,407 @@
|
||||
import time
|
||||
import urllib2
|
||||
import sys
|
||||
|
||||
"""
|
||||
next step: make it create new file each time, run cleanup op
|
||||
"""
|
||||
|
||||
def cc_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
offset = 0
|
||||
new_last_song = last_song
|
||||
page = response.read()
|
||||
while (counter < 20):
|
||||
offset = page.find('}},{"track":')
|
||||
song = page[page.find('":"')+3:page.find('","')]
|
||||
artist = page[page.find('artistName":"')+13:page.find('","amgArtistId"')]
|
||||
page = page[offset + 3:]
|
||||
song = song.replace("\/","/")
|
||||
artist = artist.replace("\/","/")
|
||||
counter = counter + 1
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
if (song == last_song):
|
||||
break
|
||||
elif (counter == 1):
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
else:
|
||||
record.write(entry)
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
time.sleep(3)
|
||||
record.close()
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
|
||||
def gm_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if '" -' in line:
|
||||
song = line[line.find('"')+1:line.find(" -")-1]
|
||||
artist = line[line.find("- ")+1:]
|
||||
artist = artist.strip()
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
record.write(entry)
|
||||
"""
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
if "Visit iTunes" in line:
|
||||
itunes_link = line[line.find('href="')+5:line.find('" target="')]
|
||||
TO DO: CREATE FILE
|
||||
WRITE ITUNES LINKS TO IT
|
||||
THEN, LATER, GRAB ALBUM RELEASE YEARS
|
||||
"""
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
record.close()
|
||||
time.sleep(3)
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
def cx_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if 'cmPlaylistContent' in line:
|
||||
song = line[line.find('/">')+3:line.find("</a></strong>")]
|
||||
artist = line[line.find("alt=")+5:line.find('" class="')]
|
||||
artist = artist.strip()
|
||||
song = song.replace("'","'")
|
||||
artist = artist.replace("'","'")
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
if (song == last_song):
|
||||
break
|
||||
elif first:
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
first = False
|
||||
else:
|
||||
record.write(entry)
|
||||
"""
|
||||
if "Download Song:" in line:
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
if "apple" in line:
|
||||
itunes_link = line[line.find('href="')+5:line.find('">iTu')
|
||||
TO DO: CREATE FILE
|
||||
WRITE ITUNES LINKS TO IT
|
||||
THEN, LATER, GRAB ALBUM RELEASE YEARS
|
||||
"""
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
record.close()
|
||||
time.sleep(3)
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
def cb_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if '<div class="track_title"' in line:
|
||||
song = line[line.find('rel=')+5:line.find('">')]
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
artist = line[line.find('rel=')+5:line.find('">')]
|
||||
line = response.readline()
|
||||
line = response.readline()
|
||||
album = line[line.find('rel=')+5:line.find('">')]
|
||||
song = song.replace("'","'")
|
||||
artist = artist.replace("'","'")
|
||||
album = album.replace("'","'")
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
record.write(entry)
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
time.sleep(3)
|
||||
record.close()
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
|
||||
def tg_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if '<div class="song"><' in line:
|
||||
counter = counter + 1
|
||||
elif '<div class="song">' in line:
|
||||
song = line[line.find('"song">')+7:line.find('</div>')]
|
||||
song = song.replace("'","'")
|
||||
line = response.readline()
|
||||
artist = line[line.find('<div>')+5:line.find(' <span')]
|
||||
song = song.replace("'","'")
|
||||
artist = artist.replace("'","'")
|
||||
song = song.replace("&","&")
|
||||
artist = artist.replace("&","&")
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
if (song == last_song):
|
||||
break
|
||||
elif first:
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
first = False
|
||||
else:
|
||||
record.write(entry)
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
time.sleep(3)
|
||||
record.close()
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
def ll_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if 'var songs = ' in line:
|
||||
tencount = 0
|
||||
while (tencount < 10):
|
||||
song = line[line.find('"title":"')+9:line.find('","')]
|
||||
line = line[line.find('"artist":')+10:]
|
||||
artist = line[:line.find('"')]
|
||||
line = line[line.find('},{"timestamp":'):]
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
tencount = tencount + 1
|
||||
break
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
time.sleep(3)
|
||||
record.close()
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
|
||||
def kx_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if 'play-song' in line:
|
||||
song = line[line.find('>')+1:line.find("</")]
|
||||
line = response.readline()
|
||||
artist = line[line.find('by ')+3:line.find('</')]
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
if (song == last_song):
|
||||
break
|
||||
elif first:
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
first = False
|
||||
else:
|
||||
record.write(entry)
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
time.sleep(3)
|
||||
record.close()
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
def ke_pull(x):
|
||||
try:
|
||||
iteration = x[1] + 1
|
||||
url = x[0]
|
||||
callsign = x[2]
|
||||
filename = callsign + str(iteration).rjust(3,'0') + ".txt"
|
||||
record = open(filename,"w")
|
||||
last_song = x[3]
|
||||
response = urllib2.urlopen(url)
|
||||
counter = 0
|
||||
first = True
|
||||
new_last_song = last_song
|
||||
while (counter < 10000):
|
||||
line = response.readline()
|
||||
if 'views-field-field-title' in line:
|
||||
song = line[line.find('field-content">')+15:line.find("</div>")]
|
||||
line = response.readline()
|
||||
artist = line[line.find('<span>')+6:line.find('</span>')]
|
||||
song = song.replace("'","'")
|
||||
artist = artist.replace("'","'")
|
||||
entry = song + "|" + artist + "|" + callsign + "|" + str(time.time()) + "\n"
|
||||
if (song == last_song):
|
||||
break
|
||||
elif first:
|
||||
new_last_song = song
|
||||
record.write(entry)
|
||||
first = False
|
||||
else:
|
||||
record.write(entry)
|
||||
counter = counter + 1
|
||||
y = (x[0],iteration,x[2],new_last_song)
|
||||
record.close()
|
||||
time.sleep(3)
|
||||
return y
|
||||
except:
|
||||
time.sleep(3)
|
||||
return x
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cc1 = ("http://www.q1043.com/services/now_playing.html?streamId=1465&limit=25",0,"WAXQ","")
|
||||
cc2 = ("http://www.lonestar925.com/services/now_playing.html?streamId=3379&limit=25",0,"KZPS","")
|
||||
cc3 = ("http://www.wbig.com/services/now_playing.html?streamId=2505&limit=25",0,"WBIG","")
|
||||
cc4 = ("http://www.big1059.com/services/now_playing.html?streamId=557&limit=25",0,"WBGG","")
|
||||
cc5 = ("http://www.thefox.com/services/now_playing.html?streamId=393&limit=25",0,"KRFX","")
|
||||
cc6 = ("http://www.dve.com/services/now_playing.html?streamId=2017&limit=25",0,"WDVE","")
|
||||
cc7 = ("http://www.wrfx.com/services/now_playing.html?streamId=1613&limit=25",0,"WRFX","")
|
||||
cc8 = ("http://www.kzep.com/services/now_playing.html?streamId=4051&limit=25",0,"KZEP","")
|
||||
cc9 = ("http://www.101kgb.com/services/now_playing.html?streamId=237&limit=25",0,"KGB","")
|
||||
gm1 = ("http://www.wcsx.com/recentlyplayed.aspx",0,"WCSX","")
|
||||
gm2 = ("http://www.wmgk.com/broadcasthistory.aspx",0,"WMGK","")
|
||||
cx1 = ("http://www.1073theeagle.com/lsp/",0,"WXGL","")
|
||||
cx2 = ("http://www.houstonseagle.com/lsp/",0,"KGLK","")
|
||||
cx3 = ("http://www.971theriver.com/lsp/",0,"WSRV","")
|
||||
cb1 = ("http://wzlx.cbslocal.com/playlist/",0,"WZLX","")
|
||||
cb2 = ("http://wncx.cbslocal.com/playlist/",0,"WNCX","")
|
||||
cb3 = ("http://kzok.cbslocal.com/playlist/",0,"KZOK","")
|
||||
tg1 = ("http://wlup.tunegenie.com/onair/",0,"WLUP","")
|
||||
tg2 = ("http://wofx.tunegenie.com/onair/",0,"WOFX","")
|
||||
tg3 = ("http://kgon.tunegenie.com/onair/",0,"KGON","")
|
||||
tg4 = ("http://kcfx.tunegenie.com/onair/",0,"KCFX","")
|
||||
tg5 = ("http://klos.tunegenie.com/onair/",0,"KLOS","")
|
||||
tg6 = ("http://kseg.tunegenie.com/onair/",0,"KSEG","")
|
||||
tg7 = ("http://kufx.tunegenie.com/onair/",0,"KUFX","")
|
||||
ll1 = ("http://player.listenlive.co/24751/en/songhistory",0,"KQRS","")
|
||||
ll2 = ("http://player.listenlive.co/25951/en/songhistory",0,"KSAN","")
|
||||
ke1 = ("http://www.kshe95.com/broadcasthistory",0,"KSHE","")
|
||||
kx1 = ("http://kslx.com/playlist",0,"KSLX","")
|
||||
|
||||
|
||||
while True:
|
||||
now = time.time()
|
||||
timer = time.localtime(now)
|
||||
#on the hour
|
||||
if (timer[4] == 58):
|
||||
cc1 = cc_pull(cc1)
|
||||
cc2 = cc_pull(cc2)
|
||||
cc3 = cc_pull(cc3)
|
||||
cc4 = cc_pull(cc4)
|
||||
cc5 = cc_pull(cc5)
|
||||
cc6 = cc_pull(cc6)
|
||||
cc7 = cc_pull(cc7)
|
||||
cc8 = cc_pull(cc8)
|
||||
cc9 = cc_pull(cc9)
|
||||
cx1 = cx_pull(cx1)
|
||||
cx2 = cx_pull(cx2)
|
||||
cx3 = cx_pull(cx3)
|
||||
tg1 = tg_pull(tg1)
|
||||
tg2 = tg_pull(tg2)
|
||||
tg3 = tg_pull(tg3)
|
||||
tg4 = tg_pull(tg4)
|
||||
tg5 = tg_pull(tg5)
|
||||
tg6 = tg_pull(tg6)
|
||||
tg7 = tg_pull(tg7)
|
||||
ll1 = ll_pull(ll1)
|
||||
ll2 = ll_pull(ll2)
|
||||
ke1 = ke_pull(ke1)
|
||||
kx1 = kx_pull(kx1)
|
||||
time.sleep(30)
|
||||
elif (timer[4] == 28):
|
||||
cx1 = cx_pull(cx1)
|
||||
cx2 = cx_pull(cx2)
|
||||
cx3 = cx_pull(cx3)
|
||||
ll1 = ll_pull(ll1)
|
||||
ll2 = ll_pull(ll2)
|
||||
kx1 = kx_pull(kx1)
|
||||
time.sleep(30)
|
||||
elif (timer[4] == 54 and timer[3] == 23):
|
||||
gm1 = gm_pull(gm1)
|
||||
gm2 = gm_pull(gm2)
|
||||
cb1 = cb_pull(cb1)
|
||||
cb2 = cb_pull(cb2)
|
||||
cb3 = cb_pull(cb3)
|
||||
time.sleep(30)
|
||||
time.sleep(30)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
38
classic-rock/readme-classicrock.txt
Normal file
38
classic-rock/readme-classicrock.txt
Normal file
@@ -0,0 +1,38 @@
|
||||
classic-rock-raw-data:
|
||||
|
||||
Each line represents a play of a song on a radio station.
|
||||
-The first element, RAW_SONG, is the song text scraped from the radio station
|
||||
-The second element, Song Clean, is the song's title. It's been made so that all versions
|
||||
of the RAW_SONG — be they (live) or spelled differently point to the same text in this \
|
||||
field. So even if we scraped "{Don't Fear} The Reaper" or "(Don't Fear) The Reaper"
|
||||
or merely "The Reaper" by Blue Oyster Cult, the text in Song Clean is always "(Don't Fear) The Reaper"
|
||||
-The third element, RAW_ARTIST, is the artist text scraped from the radio station
|
||||
-The fourth element, ARTIST CLEAN, is a unified version of Raw Artist. So even if we scraped
|
||||
"Blue Öyster Cult" or "Blue Oyster Cult" or "Blue ?yster Cult", this field would always
|
||||
read as "Blue Oyster Cult".
|
||||
-The fifth element is that station callsign of the song play
|
||||
-The sixth element is time the song was pulled. Python measures time as seconds since January 1, 1970.
|
||||
-The seventh element is a unique ID assigned to each play, formed by the callsign of the
|
||||
station that played it and a four digit number, where 0001 is the last song played on the station
|
||||
in our set and the highest number is the first song we pulled, if you want to order them.
|
||||
-The eight element combines Song Clean and ARTIST CLEAN. It can be used for connecting
|
||||
this data set to the dataset of unique songs.
|
||||
-The ninth element is a zero or one used to find if this is the first mention of a given song,
|
||||
it's pretty pointless.
|
||||
|
||||
classic-rock-song-list:
|
||||
|
||||
Each line represents one song in the set
|
||||
-Song Clean is the name of the song
|
||||
-ARTIST CLEAN is the name of the artist
|
||||
-Release Year is the release year, according to SongFacts. If there isn't a listed year, I couldn't
|
||||
find an entry for the song on SongFacts
|
||||
-COMBINED is the combined song and artist and can be used to connect this dataset to classic-rock-raw-data
|
||||
-First? is always 1
|
||||
-Year? is 1 if there was a found year and 0 if no year was found
|
||||
-PlayCount is the number of plays of the song across all stations.
|
||||
-F*G is the number of plays of the song across all stations, if a year was found.
|
||||
|
||||
radio.py is the program to scrape the data from radio sites
|
||||
|
||||
compiling_radio.py is the program to consolidate the output of radio.py into one file per station.
|
||||
Reference in New Issue
Block a user