1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 import re import requests from bs4 import BeautifulSoup p53url = "http://cshperspectives.cshlp.org/site/misc/the_p53_family.xhtml" p53html = requests.get(p53url) soup = BeautifulSoup(p53html.text) link = soup.find(id='subj_coll').findAll('a') prefix = 'http://cshperspectives.cshlp.org/content' tail = '.full.pdf' links = [] titles = [] n = 1 for item in link: path = item.get('href') uri = re.sub('.*short', '', path) url = ''.join([prefix, uri, tail]) links.append(url) title = str(n) + '.' + item.text + '.pdf' if ':' in title: title = re.sub(":", "--", title) if '/' in title: title = re.sub("/", "-", title) title = 'p53/' + title titles.append(title) n += 1 for i, j in zip(links, titles): print 'Downloading', j f = requests.get(i) with open(j, 'wb') as code: code.write(f.content) print j, 'Done!\n\n'