shafayeatsumit
9/21/2017 - 8:04 AM

30buckscodev2.py

from selenium import webdriver
import time
import re
import math

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=DesiredCapabilities.CHROME)

#driver = webdriver.Firefox()
root_url = "https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea"
driver.get(root_url)

def find_number_of_items(driver):
    item_number_list = []
    page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
    print ("page count%s"%len(page1_rows))
    for row in range(len(page1_rows)):
        page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
        name = page1_rows[row].text
        page1_rows[row].click()
        time.sleep(10)
        print ("sleeping")
        page_count = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[1]').text
        page_count = int(re.search(r'\d+',page_count).group())
        driver.get(root_url)
        item_number_list.append(page_count)
        print (item_number_list)
    return item_number_list


#lis = find_number_of_items(driver)
lis = [7, 25, 13, 3, 17, 2, 8, 17, 12, 17, 4, 38, 24, 7, 264, 16, 4, 0, 5, 5, 10, 3, 5, 9, 4]


for val in lis:
    page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
    indx = lis.index(val)
    page1_rows[indx].click()
    time.sleep(10)
    #for row in range(val):
    for row in range(val):
        page2_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
        print ("row number",row)
        #this would handle the pagination
        if ((row/9.0)>1):
            print ("inside if")
            click_nxtpage_count = int(math.floor(row/9))
            print ("nxt page count",click_nxtpage_count)
            for i in range(click_nxtpage_count):
                driver.find_element_by_xpath("//*[@id='page']/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[2]/a[contains(text(), '[next]')]").click()
                time.sleep(10)
            page2_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
            row = row%9
            page2_rows[row].click()
            time.sleep(10)
            #extract data here        
            driver.get(root_url)
            if val-1 == row:
                break
            page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
            indx = lis.index(val)
            page1_rows[indx].click()
            time.sleep(10)

        else:
            print ("inside else")
            page2_rows[row].click()
            time.sleep(10)
            driver.get(root_url)
            if val-1 == row:
                break
            page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
            indx = lis.index(val)
            page1_rows[indx].click()
            time.sleep(10)
            print ("scraped amount",row)