Utility Python script to download NOAA temperature time series data into a single CSV file
#!/usr/bin/python
"""
Author: Richard Careaga
Date: 2014-07-10
Title: NOAAscrape.py
Description: Download temperature time series data and save to CSV file
Example results (date, temperature, difference from 1901-2000 mean for month):
"1895-01-15",26.69,-3.43
"1896-01-15",31.48,1.36
"1897-01-15",28.17,-1.95
...
"2013-12-15",31.08,-1.6
Note that DD has been set to '-15' arbitrarily to facilitate treatment of this string
field as a datetime object in subsequent use of the output file, results are dictionary
sorted YY-MM-DD.
Copyright: See http://media.richard-careaga.com/lic2014.txt for copyright/permissions
"""
import csv
import re
from urllib import urlopen
"""
File path/name to store results. If file does not exist it will be created; if it does
exist, it will be appended to, not overwritten.
"""
fn = "/Users/rc/Desktop/DATA.csv"
# by inspection
fore = "http://www.ncdc.noaa.gov/cag/time-series/us/110/00/tavg/1/"
# adjust date range as required
aft = "/1895-2014.csv?base_prd=true&firstbaseyear=1901&lastbaseyear=2000"
# names of months
months = ['01','02','03','04','05','06','07','08','09','10','11','12']
# empty list to hold list of urls with data for series by month
urls = []
# matches 201405 and other dates in the data series
dates = re.compile(r'((\d{4})(\d{2}))')
# each url returns a descriptive header to be stripped out by matches
headers = re.compile(r'(Contiguous.*\nUnits.*\nBase.*\nDate.*\n)')
# create list of urls
for month in months:
urls.append(fore+month+aft)
# read in and process the data in each url; append to csv file
for url in urls:
raw = urlopen(url).read()
stripped = headers.sub('',raw)
datified = dates.sub(r'\g<2>-\g<3>-15',stripped)
listified = datified.split()
destrung = [item.split(',') for item in listified]
scrubbed = []
for entry in destrung:
lineitem = [entry[0], float(entry[1]), float(entry[2])]
scrubbed.append(lineitem)
with open(fn, 'ab') as f:
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
writer.writerows(scrubbed)