giannafusaro
11/23/2015 - 5:12 AM

heap.coffee

# # # # # # # # # # # # # # # # # # #
# TrieHeap
# # # # # # # # # # # # # # # # # # #
@TrieHeap = class TrieHeap
  constructor: (capacity = 10) ->
    @trieTree = new window.TrieTree
    @heap = new window.Heap(capacity)

  add: (word) ->
    @heap.add(@trieTree.add(word))

  sort: ->
    @heap.sort()
# # # # # # # # # # # # # # # # # # #
# TrieNode
# # # # # # # # # # # # # # # # # # #
@TrieNode = class TrieNode
  constructor: ->
    @data = { 'frequency': 0 }
    @next_chars = {}
    @seen = false
    @heap_index = -1

  visit: ->
    @seen = !@seen

  add: (letter) ->
    @next_chars[letter] ?= new TrieNode

  nextCharsInclude: (letter) ->
    !@next_chars[letter].nil?

  record: (word) ->
    @data['frequency']++
    @data['word'] = word
    @

# # # # # # # # # # # # # # # # # # #
# TrieTree
# # # # # # # # # # # # # # # # # # #
@TrieTree = class TrieTree
  constructor: ->
    @root = new TrieNode

  add: (word) ->
    current = @root
    for char in word
      current.add char
      current = current.next_chars[char]
    current.record word

  to_s: (node = @root) ->
    node.visit()
    console.log("word #{node.data.word}, freq: #{node.data.frequency}, heap index: #{node.heap_index}") if (node.data.frequency > 0)
    unless node.next_chars.length is 0
      for neighbor,value of node.next_chars
        @to_s(value) unless value.seen is node.seen
# # # # # # # # # # # # # # # # # # #
# Parser
# # # # # # # # # # # # # # # # # # #
@Parser = class Parser
  constructor: ->
    @tags = @mostFrequentVisibleWords($('html').visibleText())


  visibleText = jQuery.fn.visibleText = ->
    $.map(@contents(), (el) ->
      if el.nodeType == 3
        return $(el).text()
      if $(el).is(':visible')
        return $(el).visibleText()
      return
    ).join ''

  mostFrequentVisibleWords: (content) ->
    stopWords = ["", "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"]
    word = ''
    @trieheap = new window.TrieHeap
    for char in content
      if /[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]/.test(char) is false
        word = word + char.toLowerCase()
      else if word not in stopWords
        @trieheap.add(word)
        word = ''
      else
        word = ''
    @trieheap.sort()


$(document).ready ->
  window.parser = new window.Parser
# # # # # # # # # # # # # # # # # # #
# HeapNode
# # # # # # # # # # # # # # # # # # #
@HeapNode = class HeapNode
  constructor: (trie_node, heap_index) ->
    @trie_node = trie_node
    @trie_node.heap_index = heap_index
    @frequency = trie_node.data['frequency']
    @word = trie_node.data['word']


# # # # # # # # # # # # # # # # # # #
# Heap
# # # # # # # # # # # # # # # # # # #
@Heap = class Heap
  constructor: (capacity) ->
    @capacity = capacity
    @nodes = []

  swap: (index1, index2) ->
    [@nodes[index1], @nodes[index2]] = [@nodes[index2], @nodes[index1]]
    @nodes[index1].trie_node.heap_index = index1
    @nodes[index2].trie_node.heap_index = index2

  # make sure parents have lower frequency than children
  heapifyDownFrom: (index) ->
    [current, lchild, rchild] = [@nodes[index], @nodes[2*index+1], @nodes[2*index+2]]
    if(lchild && (current.frequency > lchild.frequency))
      @swap index, (2*index+1)
      @heapifyDownFrom(2*index+1)
    if(rchild && (current.frequency > rchild.frequency))
      @swap index, (2*index+2)
      @heapifyDownFrom(2*index+2)

  replaceRootWith: (trie_node) ->
    @nodes[0].trie_node.heap_index = -1
    @nodes[0] = new HeapNode(trie_node, 0)
    @heapifyDownFrom(0)

  # make sure children don't have lower frequency than parents
  heapifyUpFrom: (index) ->
    if (index % 2 == 1 || index == 0)
      parent_index = index//2
    else
      parent_index = index//2-1
    [current, parent] = [@nodes[index], @nodes[parent_index]]
    if(current.frequency < parent.frequency)
      @swap(index, parent_index)
      @heapifyUpFrom(index//2+1)


  add: (trie_node) ->
    switch
      when trie_node.heap_index != -1
        # trie_node exists in tree, update tree
        @nodes[trie_node.heap_index].frequency++
        @heapifyDownFrom(trie_node.heap_index)
      when @nodes.length == @capacity
        # heap is full, replace root of heap if word frequency is greater than min of heap
        @replaceRootWith(trie_node) if trie_node.data['frequency'] > @nodes[0].frequency
      else
        # heap is not full and word is not in heap
        @nodes.push(new HeapNode(trie_node, @nodes.length))
        @heapifyUpFrom((@nodes.length-1))


  sort: ->
    sorted = []
    nodes_cp = @nodes.slice(0)
    for i in [@capacity..1]
      @swap(0, i-1)
      sorted.unshift(@nodes.pop())
      @heapifyDownFrom 0
    @nodes = nodes_cp
    sorted

  to_s: ->
    "#{node.word} : #{node.frequency}" for node in @nodes