zenom
8/15/2010 - 5:40 PM

gistfile1.rb

class Post
  require 'carrierwave/orm/mongoid'
  include Mongoid::Document
  include Mongoid::Timestamps # adds created_at and updated_at fields
  include Mongoid::Slug
  include Mongoid::Paranoia
  include Stateflow
  
  # relations
  referenced_in         :source
  
  # fields
  field :url,           :type => String
  field :title,         :type => String
  field :last_modified, :type => DateTime
  field :internal,      :type => Boolean, :default => false
  field :body,          :type => String
  field :_keywords,     :type => Array
  field :_title_kwords, :type => Array # used for filtering multiple twitters.
  field :post_month,    :type => Integer
  field :post_year,     :type => Integer
  field :tags,          :type => String
  field :state,         :type => String
  field :deleted,       :type => Boolean
  field :source_img_url,:type => String
  slug  :title
  mount_uploader :image, PostImage
  
  before_save :process_post # process the month, year, keywords for this post.
  after_save  :process_image
  # state
  stateflow do
    initial :pending # default state
    state :pending, :archived, :live # states
    
    # events for maintaining these posts
    event :archive do
      transitions :from => [:pending, :live], :to => :archived, :if => :archived?
    end
    
    event :post_to_index do
      transitions :from => [:pending, :archived], :to => :live, :if => :show_index?
    end
    
  end
  
  # indexes
  index     :url
  index     :internal
  index     :post_month
  index     :post_year
  index     :tags
  index     [[:post_month, Mongo::DESCENDING], [:post_year, Mongo::DESCENDING]]
  
  # validations
  validates_presence_of   :url, :if => lambda { !internal }
  validates_presence_of   :source_id, :if => lambda { !internal }
  validates_presence_of   :title, :body
  validates_presence_of   :tags, :if => lambda { internal }
  validates_presence_of   :image, :if => lambda { internal }
  validates_format_of     :url, :with => /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix, :if => lambda { !internal }
  validates_uniqueness_of :url
  
  
  # ========================================
  # = Process this post, set keywords etc. =
  # ========================================
  private
  
    def process_image
      if self.source_img_url
        logger.info("Processing #{self.source_img_url} for #{self.title}")
        self.remote_image_url = self.source_img_url
        self.image.store!
      end
    end
    
    # process the post and setup some basic variables
    def process_post
      self.post_month = self.last_modified ? self.last_modified.month.to_i : Time.now.utc.month.to_i
      self.post_year  = self.last_modified ? self.last_modified.year.to_i : Time.now.utc.year.to_i
      
      image_url = find_image if find_image
      begin
        #self.remote_image_url = image_url
        self.source_img_url   = image_url
        #self.remote_image_url = image_url
        #self.image.store!
      rescue Exception => e
        logger.info("Unable to upload image....#{image_url} - #{e.message}")
      end
      
      # decode all the extra shit.
      decoder = HTMLEntities.new
      self.body = decoder.decode(self.body) rescue self.body
      
      self.body = self.body.gsub!(/<\/?[^>]*>/, "") 
      make_keywords
      :post_to_index
    end
  
    # make a keywords list for searching
    def make_keywords
      title     = self.title.downcase.gsub(/[^a-z ]/, '')
      summary   = self.body.downcase.gsub(/[^a-z ]/, '')
      self._keywords = (title + " " + summary).split.uniq
      self._title_kwords = title.split.uniq
    end
    
    # see if we can locate some images in this content
    def find_image
      links = (Nokogiri(self.body)/"img")
      links.each do |img|
        # only return images that are landscape, and > 250 px.
        return img['src'] if img['width'].to_i >= 250 && img['height'].to_i >= 250
      end
      nil
    end
    
    
    # used for the transitions and wether to archive or show on index
    def show_index?
      self.source.show_on_index
    end
    
    # archive if it was generated > 24 hours ago, or if show on index is false
    # otherwise return false.
    def archived?
      return true if self.last_modified > 24.hours.ago
      return true if !self.source.show_on_index
      return false
    end
end