shafayeatsumit
9/20/2017 - 3:20 PM

30buckscode.py

from selenium import webdriver

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import re
#driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=DesiredCapabilities.CHROME)
driver = webdriver.Firefox()



print("Fetching Data...")


driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea")



def extract_detail(driver):
    pass
    #return extracted data


track = [0,0,0]
#track [0] - home page; track[1] - second level page ; track[2] - pagination in second level

table = driver.find_element_by_xpath("//table[@class='fmTbl']")
table_rows = table.find_elements_by_xpath(".//tr/td/a")
for row in table_rows:
    table = driver.find_element_by_xpath("//table[@class='fmTbl']")
    table_rows = table.find_elements_by_xpath(".//tr/td/a") 
    table_rows[track[0]].click()
    time.sleep(10)
    page_count = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[1]').text
    page_count = int(re.search(r'\d+',page_count).group())
    number_of_clicks = int(page_count/10)
    print("page count",page_count) #25
    if (page_count > 10 and track[2]>0 ):
        if (track[2]< number_of_clicks):
            driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[2]/a[1]').click()
            time.sleep(10)
            table_second_level = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody')
            table_second_rows = driver.find_elements_by_xpath('.//tr/td/a')
            if(track[1]<=10):
                table_second_rows[track[1]].click()
                print ("end of cycle",e)
                track[1] = track[1] + 1
                if(track[1] == 10):
                    track[1] = 0 
                time.sleep(10)
                #need to handle pagination
                # going to last page (detail parsing)
                data = extract_detail(driver)
                driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea")        
        else:
            track[0] = track[0] + 1

                
    else:
        table_second_level = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody')
        table_second_rows = driver.find_elements_by_xpath('.//tr/td/a')
        if(track[1]<=10):
            try:
                table_second_rows[track[1]].click()
            except Exception as e:
                print ("ending of the cycle",e  )
                track[0] = track[0] + 1
            track[1] = track[1] + 1
            time.sleep(10)
            data = extract_detail(driver)   
            #need to handle pagination
            # going to last page (detail parsing)
            driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea")
            print("track value before",track[0])
    
    #track[0] = track[0] + 1s
    print("track value after",track[0])     
    if (page_count>10):
        track[2] = track[2] + 1



# table = driver.find_element_by_xpath("//table[@class='fmTbl']")
# for row in table.find_elements_by_xpath(".//tr/td/a"):
#   row.click()
#   time.sleep(20)
#   #get table element in second level
#   table_second_level = driver.find_elements_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody')[0]
#   print(table_second_level)

#   for row in table_second_level.find_elements_by_xpath(".//tr/td/a"):
#       row.click()
#       time.sleep(10)
#       driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[5]/tbody/tr/td/div/a/b').click()
#       time.sleep(10)
# driver.clos()