In [ ]:
import os
import requests
from bs4 import BeautifulSoup

#Get URL and get all links
url = ""
r = requests.get(url)
soup = BeautifulSoup(r.content)
links = soup.findAll('a')

#Select links for data after 2014
txt_links = links[36:254]

#Clean up links
cleaned_links = []
for link in txt_links:
base_url = ""
final_links = []
for link in cleaned_links:
    final_links.append(base_url + link)

#Create textfile of link addresses
os.system("mkdir data && mkdir data/text_data")
file_list = open("data/text_data/file_list.txt","w")
file_list.write('\n'.join(str(line) for line in final_links))

#Use command line to download data
os.system('cd data/text_data && wget -N -c -i file_list.txt')