tylerb
8/5/2016 - 4:17 PM

elvis.go

// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0

// Sample program that takes a stream of bytes and looks for the bytes
// “elvis” and when they are found, replace them with “Elvis”. The code
// cannot assume that there are any line feeds or other delimiters in the
// stream and the code must assume that the stream is of any arbitrary length.
// The solution cannot meaningfully buffer to the end of the stream and
// then process the replacement.
package main

import (
	"bufio"
	"bytes"
	"fmt"
	"io"
)

// data represents a table of input and expected output.
var data = []struct {
	input  []byte
	output []byte
}{
	{[]byte("abc"), []byte("abc")},
	{[]byte("elvis"), []byte("Elvis")},
	{[]byte("aElvis"), []byte("aElvis")},
	{[]byte("abcelvis"), []byte("abcElvis")},
	{[]byte("eelvis"), []byte("eElvis")},
	{[]byte("aelvis"), []byte("aElvis")},
	{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
	{[]byte("e l v i s"), []byte("e l v i s")},
	{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
	{[]byte(" elvi s"), []byte(" elvi s")},
	{[]byte("elvielvis"), []byte("elviElvis")},
	{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
	{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
}

// Declare what needs to be found and its replacement.
var find = []byte("elvis")
var repl = []byte("Elvis")

// Calculate the number of bytes we need to locate.
var size = len(find)

func main() {
	var output bytes.Buffer

	fmt.Println("=======================================\nRunning Algorithm One")
	for _, d := range data {
		output.Reset()
		algorithmOne(d.input, &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Two")
	for _, d := range data {
		output.Reset()
		algorithmTwo(d.input, &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Three")
	for _, d := range data {
		output.Reset()
		algorithmThree(bytes.NewReader(d.input), &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Four")
	for _, d := range data {
		output.Reset()
		algorithmFour(bytes.NewReader(d.input), &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}
}

// algorithmOne is one way to solve the problem.
func algorithmOne(data []byte, output *bytes.Buffer) {

	// Use a bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Declare the buffer we need to process the stream.
	buf := make([]byte, size)
	end := size - 1

	// Read in an initial number of bytes we need to get started.
	if n, err := io.ReadFull(input, buf[:end]); err != nil {
		output.Write(buf[:n])
		return
	}

	for {

		// Read in one byte from the input stream.
		b, err := input.ReadByte()
		if err != nil {

			// Flush the reset of the bytes we have.
			output.Write(buf[:end])
			break
		}

		// Add this byte to the end of the buffer.
		buf[end] = b

		// If we have a match, replace the bytes.
		if bytes.Compare(buf, find) == 0 {
			copy(buf, repl)
		}

		// Write the front byte since it has been compared.
		output.WriteByte(buf[0])

		// Slice that front byte out.
		copy(buf, buf[1:])
	}
}

// algorithmTwo is a second way to solve the problem.
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmTwo(data []byte, output *bytes.Buffer) {

	// Use the bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Read a single byte from our input.
		b, err := input.ReadByte()
		if err != nil {
			break
		}

		// Does this byte match the byte at this offset?
		if b == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// Unread the unmatched byte so it can be processed again.
			input.UnreadByte()

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b)
		idx = 0
	}
}

// algorithmThree is a second way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmThree(data io.Reader, output *bytes.Buffer) {

	// Use bufio.NewReaderSize to provide a stream from which we can read and
	// unread single bytes.
	input := bufio.NewReaderSize(data, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Read a single byte from our input.
		b, err := input.ReadByte()
		if err != nil {
			break
		}

		// Does this byte match the byte at this offset?
		if b == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// Unread the unmatched byte so it can be processed again.
			input.UnreadByte()

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b)
		idx = 0
	}
}

// algorithmFour is a fourth way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Additionally, it has been optimized for minimal allocations by reading
// directly from the stream onto the stack. This results in 1 allocation of 1
// byte at the time of writing.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmFour(data io.Reader, output *bytes.Buffer) {

	// Create a byte slice of length 1 into which our byte will be read.
	b := make([]byte, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Are we re-using the byte from a previous call?
		if b[0] == 0 {
			// Read a single byte from our input.
			n, err := data.Read(b)
			if n == 0 || err != nil {
				break
			}
		}

		// Does this byte match the byte at this offset?
		if b[0] == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			// Reset the reader byte to 0 so another byte will be read.
			b[0] = 0
			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// NOTE: we are NOT resetting the reader byte to 0 here because we need
			// to re-use it on the next call. This is equivalent to the UnreadByte()
			// call in the other functions.

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b[0])
		// Reset the reader byte to 0 so another byte will be read.
		b[0] = 0
	}
}
// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0

// go test -run none -bench . -benchtime 3s -benchmem

// Tests to see how each algorithm compare.
package main

import (
	"bytes"
	"testing"
)

// assembleInputStream appends all the input slices together to allow for
// consistent testing across all benchmarks
func assembleInputStream() []byte {
	var out []byte
	for _, d := range data {
		out = append(out, d.input...)
	}
	return out
}

// Capture the time it takes to execute algorithm one.
func BenchmarkAlgorithmOne(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
		output.Reset()
		algorithmOne(in, &output)
	}
}

// Capture the time it takes to execute algorithm two.
func BenchmarkAlgorithmTwo(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
		output.Reset()
		algorithmTwo(in, &output)
	}
}

// Capture the time it takes to execute algorithm three.
func BenchmarkAlgorithmThree(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
		output.Reset()
		// Seek our reader to the beginning of the byte array
		in.Seek(0, 0)
		algorithmThree(in, &output)
	}
}

// Capture the time it takes to execute algorithm four.
func BenchmarkAlgorithmFour(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
		output.Reset()
		// Seek our reader to the beginning of the byte array
		in.Seek(0, 0)
		algorithmFour(in, &output)
	}
}
BenchmarkAlgorithmOne-8        	 2000000       	      2803 ns/op       	      53 B/op  	       2 allocs/op
BenchmarkAlgorithmTwo-8        	 5000000       	      1042 ns/op       	       0 B/op  	       0 allocs/op
BenchmarkAlgorithmThree-8      	 3000000       	      1583 ns/op       	      16 B/op  	       1 allocs/op
BenchmarkAlgorithmFour-8       	 2000000       	      2030 ns/op       	       1 B/op  	       1 allocs/op