pmalek
8/17/2017 - 11:42 AM

really quick and dirty stackoverflow jobs feed parsing with Go

really quick and dirty stackoverflow jobs feed parsing with Go

package main

import (
	"encoding/xml"
	"fmt"
	"io"
	"os"
	"regexp"
	"strings"
	"time"
)

func isAnyOf(toCheck string, list []string) bool {
	for _, str := range list {
		if strings.EqualFold(str, toCheck) {
			return true
		}
	}
	return false
}

// Author type
type Author struct {
	Name string `xml:"name"`
}

type item struct {
	GUID string `xml:"guid"`
	Link string `xml:"link"`
	Author

	Categories []string `xml:"category"`

	Title       string `xml:"title"`
	Description string `xml:"description"`
	PubDate     string `xml:"pubDate"`
	Location    string `xml:"location"`
}

func (i item) String() string {
	rBr := regexp.MustCompile("<br[  ]*/>")
	rLi := regexp.MustCompile("<li>")

	desc := rBr.ReplaceAllString(i.Description, "\n")
	desc = rLi.ReplaceAllString(desc, "*")

	desc = strings.Replace(desc, "</li>", "", -1)
	desc = strings.Replace(desc, "<ul>", "", -1)
	desc = strings.Replace(desc, "</ul>", "", -1)

	return fmt.Sprintf("Title: %v\n", i.Title) +
		fmt.Sprintf("Link: %v\n", i.Link) +
		fmt.Sprintf("Categories: %v\n\n\n", i.Categories) +
		fmt.Sprintf("Description: %v\n\n\n", desc) +
		fmt.Sprint("**********************************************")
}

func parseFeed(xmlFile io.Reader, items chan<- item) {
	decoder := xml.NewDecoder(xmlFile)

	for {
		// Read tokens from the XML document in a stream.
		t, _ := decoder.Token()

		if t == nil {
			break
		}

		// Inspect the type of the token just read.
		switch se := t.(type) {
		case xml.StartElement:

			if se.Name.Local == "item" {
				var i item

				err := decoder.DecodeElement(&i, &se)
				if err != nil {
					fmt.Printf("Error decoding %v", err)
					continue
				}

				for _, category := range i.Categories {
					if isAnyOf(category, []string{"go", "golang"}) {
						items <- i
					}
				}
			}
		}
	}

	close(items)
}

func main() {

	if len(os.Args) != 2 {
		fmt.Printf("Error: expected an xmlFileName as argument but received %v\n", os.Args)
		os.Exit(1)
	}

	xmlFileName := os.Args[1]
	xmlFile, err := os.Open(xmlFileName)
	if err != nil {
		fmt.Printf("Error: couldn't open file %v, %v\n", xmlFileName, err)
		os.Exit(1)
	}

	items := make(chan item, 10)
	go parseFeed(xmlFile, items)

	for i := range items {
		fmt.Printf("%v\n", i)
		time.Sleep(time.Millisecond * 40)
	}
}