Python scrape EzineArticles.com
def get_ezinearticles(query, limit=25):
#returns 25
#TODO: Make get_goarticles and get_articlebase
def grab_article(url):
soup = BeautifulSoup.BeautifulSoup(URL(url).download())
return {"title": select(soup,"#article-title h1")[0].contents,
"body": select(soup,"#article-content")[0].contents,
"resource": select(soup,"#article-resource")[0].contents}
query = {"q":query}
soup = BeautifulSoup.BeautifulSoup(URL('http://ezinearticles.com/search/'\
,query=query).download())
links = [t for t in select(soup,"h3 a") if str(t).find("?expert") < 0]
urls = ["http://www.ezinearticles.com" + u['href'] for u in links]
articles = []
for url in urls:
articles.append(grab_article(url))
if len(articles) >= limit:
break
return articles