Farik
12/22/2014 - 2:23 PM

Solardemon data parser

Solardemon data parser


from bs4 import BeautifulSoup
import datetime
import urllib.request

response = urllib.request.urlopen('http://solardemon.oma.be/science/flares.php?days=365&min_seq=1&min_flux_est=0.0000100')
html = response.read()

soup = BeautifulSoup(html.decode("utf-8").replace('&nbsp', ''))
table = soup.findAll('table')[1]

rows = table.findAll('tr')

for tr in rows:
    cols = tr.findAll('td')
    if len(cols) > 0 :
        if 'colspan' in cols[0].attrs and '14' == cols[0].attrs['colspan']:
            current_month = datetime.datetime.strptime(cols[0].b.br.contents[0], "%B, %Y")
            continue

        day_of_month = int(cols[0].contents[0])
        date = current_month.replace(day=day_of_month)

        if (len(cols[9].contents) > 0) and len(cols[9].contents[0]) > 0:
            parts = date.strftime('%Y-%m-%d'), cols[2].contents[0], cols[4].contents[0], cols[8].contents[0], cols[9].contents[0]
            print("INSERT INTO ar (date,start,end,dist,ar) values ('%s', '%s', '%s', '%s', '%s');" % parts)