dbs67
11/15/2018 - 11:27 PM

How to fetch and parse an HTML page in GOLANG

How to fetch and parse an HTML page in GOLANG

// FETCH AN HTML PAGE AND PRINT OUT ALL OF THE URLS IN LINKS

import "fmt"
import "net/http"
import "code.google.com/p/go.net/html"

func parse_html(n *html.Node) {
	if n.Type == html.ElementNode && n.Data == "a" {
		for _, element := range n.Attr {
			if element.Key == "href" {
				fmt.Printf("LINK: %s\n", element.Val)
			}
		}
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		parse_html(c)
	}
}

response, err := http.Get(url)
if err != nil {
  fmt.Printf("%s", err)
} else {
  doc, err := html.Parse(response.Body)
  parse_html(doc)
}