capint
6/18/2017 - 4:26 PM

Java >> JSoup >> Broken links

Java >> JSoup >> Broken links

Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");

for (Element link : links) {
	String linkURL = link.absUrl("href").replace(" ", "%20");
		try {
			System.out.println("Checking link " + link.text());
			Response response = Jsoup.connect(linkURL).ignoreContentType(true).execute();
			System.out.println("	OK with status code:" + response.statusCode());
		} catch (HttpStatusException e1) {
			System.out.println("Found broken link: " + link.absUrl("href") + " with status code " + e1.getStatusCode());
		} catch (UnknownHostException e2) {
			System.out.println("Found broken link: " + link.absUrl("href") + " with unknown host");
		}
	}