co89757
6/17/2015 - 5:33 AM

scrape from EPSG lookup website various proj defn

scrape from EPSG lookup website various proj defn

```python
#!/usr/bin/python

import requests as rq 
import bs4 
import logging 
logging.basicConfig(filename="epsgCrawl.log",level=logging.WARNING, format="%(levelname)s::%(message)s")

class EPSGLookup(object):
    """docstring for EPSGLookup"""
    def __init__(self, codes):
        super(EPSGLookup, self).__init__()

        self.EPSGCodes = codes
        self.baseURL = "http://spatialreference.org/ref/epsg/" 
        self.mapWKT = {}.fromkeys(codes)
        self.mapProj4 = {}.fromkeys(codes)
        self.mapESRI = {}.fromkeys(codes)
    def LookupWKT(self, epsg):
        if self.mapWKT.get(epsg):
            return self.mapWKT[epsg]
        else:
            url = self.__catStr(self.baseURL, str(epsg), "/ogrwkt")
            response = rq.get(url)
            if response.status_code == 200:
                ss = response.content
                self.mapWKT[epsg] = ss 
                return ss 
            else:
                return None 
    def LookupESRI(self,epsg):
        if self.mapESRI.get(epsg):
            return self.mapESRI[epsg]
        else:
            url = self.__catStr(self.baseURL, str(epsg), "/esriwkt")
            response = rq.get(url)
            if response.status_code == 200:
                ss = response.content
                self.mapESRI[epsg] = ss 
                return ss 
            else:
                return None 
    def LookupProj4(self, epsg):
        if self.mapProj4.get(epsg):
            return self.mapProj4[epsg]
        else:
            url = self.__catStr(self.baseURL, str(epsg), "/proj4")
            response = rq.get(url)
            if response.status_code == 200:
                ss = response.content
                self.mapProj4[epsg] = ss 
                return ss 
            else:
                return None 

    def CrawlAll(self):
        self.CrawlESRI()
        self.CrawlProj4()
        self.CrawlWKT()

    def CrawlProj4(self):
        for epsg in self.EPSGCodes:
            url = self.__catStr(self.baseURL, str(epsg), "/proj4/" ) 
            logging.debug("URL: %s",url)
            response = rq.get(url)
            if response.status_code == 200:
                prj4 = response.content
                logging.debug("EPSG:%d Proj4: %s",epsg, prj4)
                self.mapProj4[epsg] = prj4 
    def CrawlWKT(self):
        for epsg in self.EPSGCodes:
            url = self.__catStr(self.baseURL, str(epsg),"/ogcwkt/")
            logging.debug("URL: %s" , url )
            response = rq.get(url)
            if response.status_code == 200:
                wkt = response.content
                logging.debug("EPSG: %d, WKT: %s",epsg, wkt)
                self.mapWKT[epsg] = wkt 
    def CrawlESRI(self):
        for epsg in self.EPSGCodes:
            url = self.__catStr(self.baseURL, str(epsg),"/esriwkt/")
            logging.debug("URL: %s" , url )
            response = rq.get(url)
            if response.status_code == 200:
                wkt = response.content
                logging.debug("EPSG: %d, ESRI_WKT: %s",epsg, wkt)
                self.mapESRI[epsg] = wkt 


            
        
    def __catStr(self,*strings):
        return ''.join(strings)






def mainTest():
    codes = xrange(4500,4700 )
    crawler = EPSGLookup(codes)
    crawler.CrawlAll()
    ## file csv 
    csvname = "epsg_table.csv"
    import csv 
    with open(csvname, 'wb') as csvfile:
        writer = csv.writer(csvfile, doublequote = False, escapechar= "\\")
        writer.writerow(["EPSG", "OGCWKT","ESRIWKT","PROJ4"  ])
        for code in codes:
            if crawler.LookupProj4(code):
                ### skip non valid code 
                writer.writerow([ str(code), crawler.mapWKT[code], crawler.mapESRI[code],crawler.mapProj4[code]  ])

    print '======DONE======='



if __name__ == '__main__':
    mainTest()
```