flying3615
7/7/2017 - 7:32 AM

Seek.co.nz crawler for the given keyword

Seek.co.nz crawler for the given keyword

import org.json.JSONObject
import org.jsoup.Connection
import org.jsoup.Jsoup
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import java.util.*
import kotlin.collections.ArrayList

operator fun String.times(i: Int): String {
    val sb = StringBuffer()
    repeat((0..i).count()) { sb.append(this) }
    return sb.toString()
}


data class Job(val id: String, val workType: String, val title: String, val salary: String = "NaN", val company: String, val location: String, val listDate: LocalDate){
    override fun toString(): String {
        return "$listDate|$title|$salary|$location|$company \n"
    }
}


fun main(keyword: String) {

    val CALLBACK = "jQuery18203420653892844612_1477784271109"
    val USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
    val API_URL = "https://jobsearch-api.cloud.seek.com.au/search"
    val NATION_CODE = "3001"
    val ITEM_PER_PAGE = 20
    val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")

    fun String.usefulPart(): String? = try {
        this.split(("$CALLBACK\\(").toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[1]
    } catch (e: Exception) {
        null
    }

    fun getRawResponse(searchWord: String, pageNum: Int = 1): Optional<String> {
        return Optional.of(Jsoup.connect(API_URL)
                .header("Accept", "*/*")
                .header("Content-Type", "application/json;charset=UTF-8")
                .data("callback", CALLBACK)
                .data("keywords", searchWord)
                .data("nation", NATION_CODE)
                .data("page", pageNum.toString())
                .userAgent(USER_AGENT)
                .method(Connection.Method.GET)
                .ignoreContentType(true)
                .timeout(10 * 1000) //sets timeout to 10 s
                .execute())
                .filter { it.statusCode() == 200 }
                .map { it.parse().body().ownText().usefulPart() }
    }


    fun getTotalPage(searchWord: String): Int {
        return getRawResponse(searchWord)
                .filter { it?.length!! > 2 }
                .map {
                    try {
                        JSONObject(it!!.substring(0, it.length - 2)).getString("totalCount").toInt()
                    } catch (e: Exception) {
                        0
                    }
                }
                .map { if (it % ITEM_PER_PAGE == 0) it / ITEM_PER_PAGE else it / ITEM_PER_PAGE + 1 }
                .orElseGet { 0 }

    }

    fun parseTextToJson(rawStr: String): ArrayList<Job> {
        val jobList = ArrayList<Job>()

        val rawJSON = rawStr.substring(0, rawStr.length - 2)
        val jsonObject = JSONObject(rawJSON)

        val jobs = jsonObject.getJSONArray("data")
        (0 until jobs.length())
                .map { jobs.get(it) as JSONObject }
                .mapTo(jobList) {
                    Job(
                        id = it.get("id").toString(),
                        workType = it.getString("workType"),
                        title = it.get("title").toString(),
                        company = it.getJSONObject("advertiser").getString("description"),
                        salary = it.getString("salary"),
                        location = it.getString("location"),
                        listDate = LocalDate.parse(it.getString("listingDate"), formatter)
                    )
                }
        return jobList
    }

    fun getJobs(query: String): Map<String, List<Job>> {
        val totalPages = getTotalPage(query)
        return (0..totalPages).map {
            val pageResult = getRawResponse(query, it)
            parseTextToJson(pageResult.get())
        }.flatten().groupBy { it.location }
    }

    getJobs(keyword).forEach { println(it.key+"\n ${"-"*10} \n"+it.value) }

}

main("java")
Auckland
 ----------- 
[2019-05-20|Junior/Intermediate Frontend Web Developer(HTML/CSS/JS)||Auckland|Moustache Republic 
, 2019-05-20|Senior Developer Team Leader|$100,000 - $129,999|Auckland|Tenancy.co.nz 
, 2019-05-07|Senior/Intermediate Frontend Developer - React, , Design||Auckland|Actionstep New Zealand Ltd 
, 2019-05-20|Senior Frontend Developer - upto $150,000 (Redux)|upto $150,000 base salary|Auckland|Absolute IT - 98.9%* of placed candidates will recommend us to others 
, 2019-05-03| Developer||Auckland|Hays Information Technology 
, 2019-05-02|Front-End Engineer, Enterprise Shopping Platform, NZ's next big thing||Auckland|The Warehouse Group 
, 2019-05-20|Backend Developer||Auckland|Roam Creative 
, 2019-05-21|Senior Front End Developer (Contract)||Auckland|Cucumber Limited 
, 2019-05-21|Lead Front-end React Developer||Auckland|40 Foot Consulting Limited 
, 2019-05-13|Digital Agile Developer||Auckland|Vodafone New Zealand 
, 2019-05-20|Dynamics 365 Technical Consultant|Excellent CBD Location|Auckland|Absolute IT - 98.9%* of placed candidates will recommend us to others 
, 2019-05-20|Intermediate Developer||Auckland|Chapman Tripp 
, 2019-05-12|Full Stack (PHP / React) Developer - Auckland|Plus benefits|Auckland|Randstad Technologies 
, 2019-05-20|Senior Integration Developer||Auckland|Datacom is one of Australasia’s largest professional IT services companies 
, 2019-04-28|Full Stack & .net core Developer||Auckland|Beyond Recruitment - Winner – Best Innovation –2018 Global Recruiter Awards 
, 2019-05-08|Application Deployment Specialist - Kubernetes / Google Cloud|$100 P/H|Auckland|Absolute IT - 98.9%* of placed candidates will recommend us to others 
]
Canterbury
 ----------- 
[2019-05-02|Senior Software Developer (C#/)||Canterbury|Verizon Connect 
, 2019-05-01|Lead / Senior Full Stack Developer (React / Node / AWS)|100k-$120k+FlexibleWorking+Shares|Canterbury|Sunstone Talent 
, 2019-05-21|Senior Software Engineer||Canterbury|XE 
, 2019-04-29|Software Developers - all levels - C# - ||Canterbury|Verizon Connect 
]
Wellington
 ----------- 
[2019-05-12|Senior .NET Developer||Wellington|Socialite Recruitment Ltd. 
]
Tasman
 ----------- 
[2019-05-20|Software Developer - Plink Software||Tasman|Intepeople Ltd 
]