bluengreen
11/1/2017 - 8:51 PM

Migrate a Mongoid / MongoDB database to an ActiveRecord based SQL one. One method to convert the schema, another to migrate data. Copes with

Migrate a Mongoid / MongoDB database to an ActiveRecord based SQL one. One method to convert the schema, another to migrate data. Copes with most basic data types. Some hacks to infer TEXT columns. Converts embeds_one relationships to AR aggregations. I wrote this for a one-time migration. It could be a good starting point for someone doing the same.

# Migrate schema and data from Mongoid to MySQL ActiveRecord
class MongoidMysqlMigrator
  
  def randomize_auto_increment_values(source_models, from=5500, to=10500)
    source_models.each do |model|
      value = rand(from-to)+from
      sql = %(ALTER TABLE #{model.name.tableize} AUTO_INCREMENT=#{value})
      puts sql
      ActiveRecord::Base.connection.execute(sql)
    end
  end
  
  # @param sources (Array)  list of Mongoid document types
  def migrate_data(sources)
    sources.each do |source|
      size = source.count
      migrate_source_collection(source, source.all) if size > 0
    end
    
    sources.each do |source_model|
      puts "Resolving foreign keys for #{source_model}"
      target_class = define_active_record_model_class(source_model)
      
      each_belongs_to(source_model) do |relation|
        field_name = relation.foreign_key
        if relation.polymorphic?
          # Get the list of distinct class names from the polymophic association _type
          # fields and perform an update for each
          class_names = target_class.group("#{relation.name}_type").map(&"#{relation.name}_type".to_sym)
          class_names.map(&:tableize).each do |rel_target_table|
            sql = %(UPDATE #{target_class.table_name} t1
                      INNER JOIN #{rel_target_table} t2 ON
                      t2.mongo_id = t1.#{field_name}_mongo_id
                    SET t1.#{field_name} = t2.id)
            #puts sql
            ActiveRecord::Base.connection.execute(sql)
          end
        else
          rel_target_table = relation.class_name.tableize
          sql = %(UPDATE #{target_class.table_name} t1
                    INNER JOIN #{rel_target_table} t2 ON
                    t2.mongo_id = t1.#{field_name}_mongo_id
                  SET t1.#{field_name} = t2.id)
          #puts sql
          ActiveRecord::Base.connection.execute(sql)
        end
      end
      
      # Wire up embedded docs to parent
      each_embedded_relation(source_model) do |relation|
        embedded_class_name = relation.class_name
        embedded_class = embedded_class_name.constantize
        embedded_mysql_class = define_active_record_model_class(embedded_class)
        embedded_ins = embedded_in_relations(embedded_class)
        raise "too many embedded_in relations" unless embedded_ins.size == 1
        
        # There's no foreign key on the embedded doc so we infer it from
        # the relation name. For polymorphic embeds, this might be something
        # like parent_id or addressable_id
        rel_name = embedded_ins.values.first.name
        field_name = "#{rel_name}_id"
        rel_target_table = source_model.name.tableize
        
        sql = %(UPDATE #{embedded_mysql_class.table_name} SET #{field_name}=(
            SELECT id FROM #{rel_target_table} WHERE
                #{rel_target_table}.mongo_id = #{embedded_mysql_class.table_name}.#{field_name}_mongo_id
        ))
        ActiveRecord::Base.connection.execute(sql)
        #puts sql
      end
    end
  end
  
  def truncate_targets(sources)
    sources.each do |source|
      target_class = define_active_record_model_class(source)
      target_class.destroy_all
    end
  end
  
  # @param sources (Array)  list of Mongoid document types
  def migrate_schema(mongoid_models)
    
    # Build MySQL schema
    mongoid_models.each do |mongoid_model|
      table = mongoid_model.name.tableize
      
      # Drop and create SQL table
      migrator.create_table table, :force => true
      
      # Add mongo id column
      migrator.add_column table, :mongo_id, :string
      migrator.add_index table, :mongo_id
      
      # Add data fields
      each_basic_field(mongoid_model) do |field|
        migrate_field_to_table(table, field)
      end
      
      # Add foreign key fields
      mongoid_model.relations.each do |rel_name, relation|
        if relation.relation == Mongoid::Relations::Referenced::In ||
           relation.relation == Mongoid::Relations::Embedded::In
          field = relation.foreign_key unless Mongoid::Relations::Embedded::In
          field ||= "#{relation.name}_id"
          migrator.add_column table, field, :integer
          migrator.add_index table, field, :name => field
          if relation.relation == Mongoid::Relations::Embedded::In && relation.polymorphic?
            type_field = "#{relation.name}_type"
            migrator.add_column table, type_field, :string
            migrator.add_index table, type_field, :name => type_field
          end
          migrator.add_column table, field+"_mongo_id", :string, :length => 30
          migrator.add_index table, field+"_mongo_id", :name => field+"_mongo_id"
        end
        
        if relation.relation == Mongoid::Relations::Embedded::One
          # See if we need to convert this embeds_one into a composed_of
          klass = relation.class_name.constantize
          #if composable_types.include? [relation.inverse_class_name.constantize, klass]
            prefix = relation.name
            # Migrate all the embedded classes fields into the target
            # table but with prefixed field names
            each_basic_field(klass) do |field|
              migrate_field_to_table(table, field, "#{prefix}_#{field.name}")
            end
          #end
        end
      end
      
      # Carrier wave uploaders
      each_uploader(mongoid_model) do |field, uploader_type|
        migrator.add_column table, field, :string
      end
    end
  end
  
  private
  
  def migrate_field_to_table(table, mongoid_field, field_name = nil)
    indexed_fields = mongoid_field.options[:klass].index_options.keys.map(&:keys).map(&:first)
    
    toSqlType = {
      String => :string,
      Time => :datetime,
      Integer => :integer,
      Boolean => :boolean,
      ::Money => [:decimal, :precision => 8, :scale => 2, :default => 0.00],
      BigDecimal => [:decimal, :precision => 8, :scale => 3, :default => 0.000],
      Date => :date,
      Hash => :text,
      Array => :text
    }
    
    field_name ||= mongoid_field.name
    type = mongoid_field.type
    sql_type = toSqlType[type]
    default = mongoid_field.options[:default]
    default = nil if default.is_a?(Proc) || sql_type == :text
    null = default.nil?
    null = true if sql_type == :text
    
    # Switch some String colums up to text
    if sql_type == :string &&
      (field_name.include?('notes') ||
      field_name == 'value' ||
      field_name == 'content' ||
      field_name.include?('_footer') ||
      field_name.include?('_extra_text'))
      
      sql_type = :text
      default = nil
      null = true
    end
    
    column_options = {
      :default => default,
      :null => null
    }
    
    if sql_type.is_a?(Array)
      column_options.merge!(sql_type[1])
      sql_type = sql_type.first
    end
    
    if sql_type
      migrator.add_column table, field_name, sql_type, column_options
      if indexed_fields.include? field_name.to_sym
        migrator.add_index table, field_name, :name => field_name
      end
    else
      puts colorize("Skipped field #{table}##{field_name} with unknown type #{type}", COLORS[:red])
    end
  end
  
  def embedded_in_relations(model)
    model.relations.select { |k,v| v.relation == Mongoid::Relations::Embedded::In }
  end
  
  # @param source_model (Class)  the Mongoid model class
  # @param collection (Array)  the model instances to migrate 
  def migrate_source_collection(source_model, collection)
    target_class = define_active_record_model_class(source_model)
    #target_class.destroy_all
    
    collection.each do |source|
      target = migrate_source_document(source, target_class)
      
      # Now step through embedded singletons and collections, converting those...
      each_embedded_relation(source_model) do |relation|
        puts "Migrating embedded #{source_model} #{source.id} #{relation.name}"
        embedded_class = relation.class_name.constantize
        
        if relation.relation == Mongoid::Relations::Embedded::One# &&
#          composable_types.include? [source_model, embedded_class]
          puts "Migrating to aggregation #{target.class} #{embedded_class}"
          # it's to be converted to a composed_of
          prefix = relation.name
          embedded_doc = source.send(relation.name)
          if embedded_doc
            each_basic_field(embedded_class) do |field|
              copy_field_value(embedded_doc, target, field, "#{prefix}_#{field.name}")
            end
          end
          target.save!
        else
          value = source.send(relation.name)
          value = [value] if relation.relation == Mongoid::Relations::Embedded::One
          value = value.compact # Embedded document might not exist
          migrate_source_collection(embedded_class, value) do |target|
            # No foreign key back to the parent document exists for
            # embedded documents so we need to set it manually here
            foreign_key_name = source_model.name.tableize.singularize+'_id'
            target.send("#{foreign_key_name}_mongo_id=", source.id.to_s)
          end
        end
      end
    end
  end
  
  def copy_field_value(source, target, mongoid_field, target_field_name = nil)
    target_field_name ||= mongoid_field.name
    type = mongoid_field.type
  
    value = source.send(mongoid_field.name)
    value = value.to_s if type == ::Money
      
    target.send("#{target_field_name}=", value)
  end
  
  def migrate_source_document(source, target_class)
    source_class = source.class
    target = target_class.new
    puts "Migrating #{source.class} #{source.id}..."

    # Store Mongo document ID
    target.mongo_id = source.id.to_s
    
    # Copy fields
    each_basic_field(source_class) do |field|
      copy_field_value(source, target, field)
    end
    
    # For each relation, we store the Mongo document id in
    # a temporary <key field>_mongo_id string column
    each_belongs_to(source_class) do |relation|
      field_name = relation.foreign_key
      value = source.send(field_name).to_s
      target.send("#{field_name}_mongo_id=", value)
    end
    
    each_embedded_in(source_class) do |relation|
      field_name = "#{relation.name}_id"
      value = source.send(relation.name).id.to_s
      target.send("#{field_name}_mongo_id=", value)
      if relation.polymorphic?
        target.send("#{relation.name}_type=", source.send(relation.name).class.name)
      end
    end
    
    each_uploader(source_class) do |field, uploader_type|
      target.send("#{field}=", source.send('[]', field))
    end
    
    target.save!
    target
  end
  
  # Yield block for every basic (not key, uploader etc) field
  def each_basic_field(mongoid_model, &block)
    mongoid_model.fields.each do |field_name, field|
      unless field.name[0] == '_' ||                              # _type or _id
        field.name.ends_with?('_id') && field.type == Object ||   # foreign key
        mongoid_model.uploaders.keys.include?(field_name.to_sym) || # uploader
        field.type == Symbol # something to do with polymorphic relations e.g. commentable_field
        yield(field)
      end
    end
  end
  
  def each_belongs_to(mongoid_model, &block)
    # Add foreign key fields
    mongoid_model.relations.each do |rel_name, relation|
      if relation.relation == Mongoid::Relations::Referenced::In
        yield(relation)
      end
    end
  end
  
  def each_embedded_relation(mongoid_model, &block)
    # Add foreign key fields
    mongoid_model.embedded_relations.each do |rel_name, relation|
      yield(relation)
    end
  end
  
  def each_embedded_in(mongoid_model, &block)
    # Add foreign key fields
    embedded_in_relations(mongoid_model).each do |rel_name, relation|
      yield(relation)
    end
  end
  
  def each_uploader(mongoid_model, &block)
    mongoid_model.uploaders.each do |field, uploader_type|
      yield(field, uploader_type)
    end
  end
  
  def define_active_record_model_class(mongoid_model)
    @_ar_class_cache ||= {}
    arclass = @_ar_class_cache[mongoid_model]
    return arclass if arclass
    target_class_name = "Mysql#{mongoid_model.name}"
    # Define an ActiveRecord model class
    target_class = Class.new(ActiveRecord::Base) do
      set_table_name mongoid_model.name.tableize
    end
    Kernel.const_set(target_class_name, target_class)
    @_ar_class_cache[mongoid_model] = target_class
    #target_class
  end
  
  def migrator
    @_migrator ||= ActiveRecord::Migration.new
  end
  
  def colorize(text, color_code)
    "\033[#{color_code}m#{text}\033[0m"
  end

  COLORS = {
    :black => 30,
    :red => 31,
    :green => 32,
    :yellow => 33,
    :blue => 34,
    :magenta => 35,
    :cyan => 36,
    :white => 37,

    :warn => 36,
    :error => 35
  }
  
end