-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmp3scraper.py
58 lines (51 loc) · 2.2 KB
/
mp3scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import re
import urllib2
from bs4 import BeautifulSoup
def urlmp3downloader(url,dlmusicfolder):
# The following code uses urllib2 and BeautifulSoup to open, read and extract all the <a> tag links with '.mp3' to the links variable
try:
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
links = soup.find_all('a', href=re.compile("\.mp3$"))
except urllib2.HTTPError, e:
print e
return ""
except urllib2.URLError, e:
print e
return ""
except Exception:
print "generic exception"
return ""
# Checks to see if the dlmusicfolder path already exists. If not, it will create that directory to put the files
if not os.path.exists(dlmusicfolder):
os.makedirs(dlmusicfolder)
# Initialize loop for each of the '.mp3' links
for link in links:
# Initialized four local variables: songName, fileName, fullLink and musicfilePath
# -- songName stores set to the text between the <a></a> tags.
# -- file name stores the songName text, performs basic character escaping and appends the .mp3 file extension
# -- fullLink stores the link location for the file being downloaded
# -- musicfilePate creates and stores the full filepath and name for the mp3 being downloaded
fileName = link["href"].split("/")[-1] #songName.replace("/","").replace("\\","") + ".mp3"
fullLink = link.get('href')
musicfilePath = os.path.join(dlmusicfolder,fileName)
# Checks if the file has been previously downloaed
if not os.path.exists(musicfilePath):
# Prints the string for file being downloaded and the filepath
print "Downloading: " + fullLink + " | To: " + musicfilePath
# Opens the link using urllib2
f = urllib2.urlopen(fullLink)
# writes the opened file to disk
with open(musicfilePath, "wq") as local_file:
local_file.write(f.read())
# If the file already exists (has been previously downloaded) prints following string imforming the user
else: print fileName + " has already been downloaded!"
# Accepts and iterates through a list of (x,y) values where:
# x is the website and
# y is the filepath
def multisitemp3(sitelist):
for (x,y) in sitelist:
n = len(x)
print (n+24)*"-" + "\n" + "| Checking " + x + " for mp3s.. |" + "\n" + (n+24)*"-"
urlmp3downloader(x,y)