Solardemon data parser
from bs4 import BeautifulSoup
import datetime
import urllib.request
response = urllib.request.urlopen('http://solardemon.oma.be/science/flares.php?days=365&min_seq=1&min_flux_est=0.0000100')
html = response.read()
soup = BeautifulSoup(html.decode("utf-8").replace(' ', ''))
table = soup.findAll('table')[1]
rows = table.findAll('tr')
for tr in rows:
cols = tr.findAll('td')
if len(cols) > 0 :
if 'colspan' in cols[0].attrs and '14' == cols[0].attrs['colspan']:
current_month = datetime.datetime.strptime(cols[0].b.br.contents[0], "%B, %Y")
continue
day_of_month = int(cols[0].contents[0])
date = current_month.replace(day=day_of_month)
if (len(cols[9].contents) > 0) and len(cols[9].contents[0]) > 0:
parts = date.strftime('%Y-%m-%d'), cols[2].contents[0], cols[4].contents[0], cols[8].contents[0], cols[9].contents[0]
print("INSERT INTO ar (date,start,end,dist,ar) values ('%s', '%s', '%s', '%s', '%s');" % parts)