from selenium import webdriver
from selenium.webdriver.chrome.options import Options
class ChromeExtractor:
@staticmethod
def get_page_content(url: str) -> str:
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
d = webdriver.Chrome('/usr/bin/chromedriver', chrome_options=chrome_options)
d.get(url)
page_src = d.page_source
d.close()
return page_src
@staticmethod
def get_page_links(url: str) -> list:
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
d = webdriver.Chrome('/usr/bin/chromedriver', chrome_options=chrome_options)
d.get(url)
links = d.find_elements_by_css_selector(".list-post-body .entry-title a")
result = []
for i, web_element in enumerate(links):
url = web_element.get_attribute('href')
result.append(url)
d.close()
return result