In [ ]:
import os
import requests
from bs4 import BeautifulSoup

#Get URL and get all links
url = "http://web.mta.info/developers/turnstile.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)
links = soup.findAll('a')

#Select links for data after 2014
txt_links = links[36:254]


#Clean up links
cleaned_links = []
for link in txt_links:
    cleaned_links.append(str(link).split("\"")[1])
    
base_url = "http://web.mta.info/developers/"
final_links = []
for link in cleaned_links:
    final_links.append(base_url + link)

    
#Create textfile of link addresses
os.system("mkdir data && mkdir data/text_data")
file_list = open("data/text_data/file_list.txt","w")
file_list.write('\n'.join(str(line) for line in final_links))
file_list.close()

#Use command line to download data
os.system('cd data/text_data && wget -N -c -i file_list.txt')