chourobin
12/27/2011 - 10:37 PM

Count the number of a given tag on a web page using Go

Count the number of a given tag on a web page using Go

package main

import (
  "os"
  "fmt"
  "strings"
  "flag"
  "http"
  "html"
)

func usage() {
  fmt.Println("counter - count HTML tags in a page")
  fmt.Println("Usage: counter [--url <url> | --host <hostname>] --tag [tag]")
  flag.PrintDefaults()
}

var host, url, tag string

func main() {
  flag.StringVar(&host, "host", "", "hostname to fetch")
  flag.StringVar(&url, "url", "", "url to fetch")
  flag.StringVar(&tag, "tag", "span", "tag name to count")
  flag.Usage = usage
  flag.Parse()

  if host == "" && url == "" {
    flag.Usage()
    os.Exit(127)
  }

  if host != "" && !strings.Contains(host, "://") {
    url = fmt.Sprintf("http://%v/", host)
  }

  fmt.Println("Fetching", url, "to look for", tag)

  response, err := http.Get(url)
  if err != nil {
    fmt.Println("Unable to fetch", url, err)
    os.Exit(1)
  }
  defer response.Body.Close()

  count := countTag(tag, response)
  fmt.Println("Found", count, "copies of", tag)
}

func countTag(tag string, response *http.Response) (count int) {
  data := html.NewTokenizer(response.Body)

  for {
    tokenType := data.Next()
    switch tokenType {
      case html.StartTagToken:
        tagName, _ := data.TagName()
        if tag == string(tagName) {
          count++
        }
      case html.ErrorToken:
        return
    }
  }
  return
}