Skip to content
migrate.module 19.4 KiB
Newer Older
// $Id$
/**
 * @file
 * API and drush commands to support migration of data from external sources
 * into a Drupal installation.
 */

// TODO:
// Continue hook_schema_alter() for map & message tables?
// Views hooks for map/message tables
// xlat support?
// Integration API support?
// Documentation
// Tests

/**
 * Retrieve a list of all active migrations, ordered by dependencies. To be
 * recognized, a class must be non-abstract, derived from Migration, and the
 * class name must end in "Migration".
 *
 * @param $refresh
 *  The list is statically cached - if TRUE, this forces the list to be rebuilt.
 *
 * @return
 *  Array of migration objects, keyed by the class name.
 */
function migrate_migrations($refresh = FALSE) {
  // First, get a list of all Migration classes as a starting point. Note
  // we set up PHP errors to be handled by throwing exceptions, so we can deal
  // with migrations that have parameters and thus can't be instantiated here.
  $error_handler = set_error_handler('migrate_exception_error_handler');
  $migrations = _migrate_class_list('%Migration', 'MigrationBase', $refresh);
  set_error_handler($error_handler);

  // Then, let modules add instances of dynamic migrations, and perhaps remove
  // the classes underlying them
  drupal_alter('migrations', $migrations);
  return $migrations;
}

function migrate_exception_error_handler($errno, $errstr, $errfile, $errline ) {
  throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
 * Invoke any available handlers attached to a given destination type.
 * If any handlers have dependencies defined, they will be invoked after
 * the specified handlers.
 *
 * @param $destination
 *  Destination type ('Node', 'User', etc.) - generally the same string as
 *  the destination class name without the MigrateDestination prefix.
 * @param $method
 *  Method name such as 'prepare' (called at the beginning of an import operation)
 *  or 'complete' (called at the end of an import operation).
 * @param ...
 *  Parameters to be passed to the handler.
function migrate_handler_invoke_all($destination, $method) {
  $args = func_get_args();
  array_shift($args);
  array_shift($args);
  $return = array();
  $class_list = _migrate_class_list('%Handler', 'MigrateDestinationHandler');
  foreach ($class_list as $class_name => $handler) {
    if ($handler->handlesType($destination) && method_exists($handler, $method)) {
Mike Ryan's avatar
Mike Ryan committed
      migrate_instrument_start($class_name . '->' . $method);
      $result = call_user_func_array(array($handler, $method), $args);
Mike Ryan's avatar
Mike Ryan committed
      migrate_instrument_stop($class_name . '->' . $method);
      if (isset($result) && is_array($result)) {
        $return = array_merge_recursive($return, $result);
      }
      elseif (isset($result)) {
        $return[] = $result;
      }
  return $return;
/**
 * Invoke any available handlers attached to a given field type.
 * If any handlers have dependencies defined, they will be invoked after
 * the specified handlers.
 *
 * @param $entity
 *  The object we are building up before calling example_save().
 * @param $field_info
 *  Array of info on the field, from field_info_field().
 * @param $instance
 *  Array of info in the field instance, from field_info_instances().
 * @param $values
 *  Array of incoming values, to be transformed into the appropriate structure
 *  for the field type.
 */
function migrate_field_handler_invoke_all(stdClass $entity, array $field_info, array $instance,
    array $values) {
  $return = array();
  $type = $field_info['type'];
  $class_list = _migrate_class_list('%FieldHandler', 'MigrateFieldHandler');
  foreach ($class_list as $class_name => $handler) {
    if ($handler->handlesType($type) && method_exists($handler, 'prepare')) {
      migrate_instrument_start($class_name . '->prepare');
      $result = call_user_func_array(array($handler, 'prepare'),
        array($entity, $field_info, $instance, $values));
      migrate_instrument_stop($class_name . '->prepare');
      if (isset($result) && is_array($result)) {
        $return = array_merge_recursive($return, $result);
      }
      elseif (isset($result)) {
        $return[] = $result;
      }
    }
  }
  return $return;
}

/**
 * For a given class name pattern and parent class, identify and instantiate singleton
 * objects for any non-abstract classes with names matching the pattern and derived
 * from the parent, returning an array of the objects indexed by class name. The
 * array will be ordered such that any classes with dependencies are listed after
 * the classes they are dependent on.
 *
 * TODO: Is there a way to do this without depending on naming conventions and searching
 * the registry? I.e., find all subclasses of a given class including potential
 * autoloads?
 *
 * @param $class_pattern
 *  SQL LIKE pattern for class names to match (e.g., '%Migration').
 * @param $parent_class
 *  Name of a class from which results will be derived.
 * @param $refresh
 *  The list is statically cached - if TRUE, this forces the list to be rebuilt.
 * @return
 *  Array of objects, keyed by the class name.
 */
function _migrate_class_list($class_pattern, $parent_class, $refresh = FALSE) {
  static $class_lists = array();
Mike Ryan's avatar
Mike Ryan committed
  if (!isset($class_lists[$class_pattern]) || $refresh) {
    $class_lists[$class_pattern] = array();
    $dependent_classes = array();
    $required_classes = array();

    // Discover matching class names registered with Drupal
    $result = db_select('registry', 'r')
              ->fields('r', array('name'))
              ->condition('type', 'class')
              ->condition('name', $class_pattern, 'LIKE')
              ->orderBy('name')
              ->execute();

    foreach ($result as $record) {
      // Validate it's an implemented subclass of the parent class
      $class = new ReflectionClass($record->name);
      if (!$class->isAbstract() && $class->isSubclassOf($parent_class)) {
          if (class_exists($record->name)) {
            $machine_name = $record->name;
          }
          else {
            $machine_name = substr($record->name, 0, strlen($record->name) - strlen('Migration'));
          }
          $object = Migration::getInstance($machine_name);
          // If the constructor has required parameters, this may fail. We will
          // silently ignore - it is up to the implementor of such a class to
          // instantiate it in hook_migrations_alter().
          try {
            $object = new $record->name;
          }
          catch (Exception $e) {
          }
        if (isset($object)) {
          $dependencies = $object->getDependencies();
          if (count($dependencies) > 0) {
            // Set classes with dependencies aside for reordering
            $dependent_classes[$record->name] = $object;
            $required_classes += $dependencies;
          }
          else {
            // No dependencies, just add
            $class_lists[$class_pattern][$record->name] = $object;
          }
    // Validate that each depended-on class at least exists
    foreach ($required_classes as $class_name) {
      if (!class_exists($class_name)) {
        $class_name .= 'Migration';
      }
Mike Ryan's avatar
Mike Ryan committed
      if ((!isset($dependent_classes[$class_name])) && !isset($class_lists[$class_pattern][$class_name])) {
        throw new MigrateException(t('Dependency on non-existent class !class - make sure ' .
            'you have added the file defining !class to the .info file.',
          array('!class' => $class_name)));
      }
    }

    // Scan modules with dependencies - we'll take 20 passes at it before
    // giving up
    $iterations = 0;
    while (count($dependent_classes) > 0) {
      if ($iterations++ > 20) {
        $class_names = implode(',', array_keys($dependent_classes));
        throw new MigrateException(t('Failure to sort class list - most likely due ' .
            'to circular dependencies involving !class_names.',
          array('!class_names' => $class_names)));
      }
      foreach ($dependent_classes as $name => $object) {
        $ready = TRUE;
        // Scan all the dependencies for this class and make sure they're all
        // in the final list
        foreach ($object->getDependencies() as $dependency) {
          if (!class_exists($dependency)) {
            $dependency .= 'Migration';
          }
Mike Ryan's avatar
Mike Ryan committed
          if (!isset($class_lists[$class_pattern][$dependency])) {
            $ready = FALSE;
            break;
        if ($ready) {
          // Yes they are! Move this class to the final list
Mike Ryan's avatar
Mike Ryan committed
          $class_lists[$class_pattern][$name] = $object;
          unset($dependent_classes[$name]);
        }
Mike Ryan's avatar
Mike Ryan committed
  return $class_lists[$class_pattern];
Mike Ryan's avatar
Mike Ryan committed
/**
 * Resource functions modeled on Drupal's timer functions
 */

/**
 * Implementation of hook_watchdog().
 * Find the migration that is currently running and notify it.
 *
 * @param array $log_entry
 */
function migrate_watchdog($log_entry) {
  if ($migration = Migration::currentMigration()) {
    // TODO: set appropriate level
    $migration->saveMessage(t($log_entry['message'], $log_entry['variables']));
  }
}

Mike Ryan's avatar
Mike Ryan committed
/**
 * Save memory usage with the specified name. If you start and stop the same
 * memory name multiple times, the measured differences will be accumulated.
 *
 * @param name
 *   The name of the memory measurement.
 */
function migrate_memory_start($name) {
  global $_migrate_memory;

  $_migrate_memory[$name]['start'] = memory_get_usage();
  $_migrate_memory[$name]['count'] =
    isset($_migrate_memory[$name]['count']) ? ++$_migrate_memory[$name]['count'] : 1;
}

/**
 * Read the current memory value without recording the change.
 *
 * @param name
 *   The name of the memory measurement.
 * @return
 *   The change in bytes since the last start.
 */
function migrate_memory_read($name) {
  global $_migrate_memory;

  if (isset($_migrate_memory[$name]['start'])) {
    $stop = memory_get_usage();
    $diff = $stop - $_migrate_memory[$name]['start'];

    if (isset($_migrate_memory[$name]['bytes'])) {
      $diff += $_migrate_memory[$name]['bytes'];
    }
    return $diff;
  }
  return $_migrate_memory[$name]['bytes'];
}

/**
 * Stop the memory counter with the specified name.
 *
 * @param name
 *   The name of the memory measurement.
 * @return
 *   A memory array. The array contains the number of times the memory has been
 *   started and stopped (count) and the accumulated memory difference value in bytes.
 */
function migrate_memory_stop($name) {
  global $_migrate_memory;

Mike Ryan's avatar
Mike Ryan committed
  if (isset($_migrate_memory[$name])) {
    if (isset($_migrate_memory[$name]['start'])) {
      $stop = memory_get_usage();
      $diff = $stop - $_migrate_memory[$name]['start'];
      if (isset($_migrate_memory[$name]['bytes'])) {
        $_migrate_memory[$name]['bytes'] += $diff;
      }
      else {
        $_migrate_memory[$name]['bytes'] = $diff;
      }
      unset($_migrate_memory[$name]['start']);
Mike Ryan's avatar
Mike Ryan committed
    }
Mike Ryan's avatar
Mike Ryan committed
    return $_migrate_memory[$name];
  }
Mike Ryan's avatar
Mike Ryan committed
 * Start measuring time and (optionally) memory consumption over a section of code.
 * Note that the memory consumption measurement is generally not useful in
 * lower areas of the code, where data is being generated that will be freed
 * by the next call to the same area. For example, measuring the memory
 * consumption of db_query is not going to be helpful.
Mike Ryan's avatar
Mike Ryan committed
 *
 * @param $name
 *  The name of the measurement.
Mike Ryan's avatar
Mike Ryan committed
 * @param $include_memory
 *  Measure both memory and timers. Defaults to FALSE (timers only).
Mike Ryan's avatar
Mike Ryan committed
function migrate_instrument_start($name, $include_memory = FALSE) {
Mike Ryan's avatar
Mike Ryan committed
  global $_migrate_track_memory, $_migrate_track_timer;
Mike Ryan's avatar
Mike Ryan committed
  if ($_migrate_track_memory && $include_memory) {
Mike Ryan's avatar
Mike Ryan committed
    migrate_memory_start($name);
  }
  if ($_migrate_track_timer) {
    timer_start($name);
  }
Mike Ryan's avatar
Mike Ryan committed
}

/**
 * Stop measuring both memory and time consumption over a section of code.
 *
 * @param $name
 *  The name of the measurement.
 */
function migrate_instrument_stop($name) {
Mike Ryan's avatar
Mike Ryan committed
  global $_migrate_track_memory, $_migrate_track_timer;
  if ($_migrate_track_timer) {
    timer_stop($name);
  }
  if ($_migrate_track_memory) {
    migrate_memory_stop($name);
  }
/**
 * Call hook_migrate_overview for overall documentation on implemented migrations.
 */
function migrate_overview() {
  $overview = '';
  $results = module_invoke_all('migrate_overview');
  foreach ($results as $result) {
    $overview .= $result . ' ';
  }
  return $overview;
}

// TODO: The functions below are D6 functions of some potential use in D7, that
// haven't been updated/integrated yet
/**
 * Implementation of hook_schema_alter().
 */
/*
function migrate_schema_alter(&$schema) {
  // Check for table existence - at install time, hook_schema_alter() may be called
  // before our install hook.
  if (db_table_exists('migrate_content_sets')) {
    $result = db_query("SELECT * FROM {migrate_content_sets}");
    while ($content_set = db_fetch_object($result)) {
      $maptablename = migrate_map_table_name($content_set->mcsid);
      $msgtablename = migrate_message_table_name($content_set->mcsid);

      // Get the proper field definition for the sourcekey
      $view = views_get_view($content_set->view_name);
      if (!$view) {
        drupal_set_message(t('View !view does not exist - either (re)create this view, or
          remove the migrate content set using it.', array('!view' => $content_set->view_name)));
        continue;
      }
      // Must do this to load the database
      $view->init_query();

      // TODO: For now, PK must be in base_table
      if (isset($view->base_database)) {
        $tabledb = $view->base_database;
      }
      else {
        $tabledb = 'default';
      }
      $tablename = $view->base_table;

      $sourceschema = _migrate_inspect_schema($tablename, $tabledb);

      // If the PK of the content set is defined, make sure we have a mapping table
      $sourcekey = $content_set->sourcekey;
      if ($sourcekey) {
        $sourcefield = $sourceschema['fields'][$sourcekey];
        if (!$sourcefield) {
          // strip base table name if views prepended it
          $baselen = drupal_strlen($tablename);
          if (!strncasecmp($sourcekey, $tablename . '_', $baselen + 1)) {
            $sourcekey = drupal_substr($sourcekey, $baselen + 1);
          }
          $sourcefield = $sourceschema['fields'][$sourcekey];
        }
        // We don't want serial fields to behave serially, so change to int
        if ($sourcefield['type'] == 'serial') {
          $sourcefield['type'] = 'int';
        }
        $schema[$maptablename] = _migrate_map_table_schema($sourcefield);
        $schema[$maptablename]['name'] = $maptablename;
        $schema[$msgtablename] = _migrate_message_table_schema($sourcefield);
        $schema[$msgtablename]['name'] = $msgtablename;
      }
    }
  }
/*
 * Translate URIs from an old site to the new one
 * Requires adding RewriteRules to .htaccess. For example, if the URLs
 * for news articles had the form
 * http://example.com/issues/news/[OldID].html, use this rule:
 *
 * RewriteRule ^issues/news/([0-9]+).html$ /migrate/xlat/node/$1 [L]
 *
 * @param $contenttype
 *  Content type to translate (e.g., 'node', 'user', etc.)
 * @param $oldid
 *  Primary key from input view
 */
function migrate_xlat($contenttype, $oldid) {
  if ($contenttype && $oldid) {
    $newid = _migrate_xlat_get_new_id($contenttype, $oldid);
    if ($newid) {
      $uri = migrate_invoke_all("xlat_$contenttype", $newid);
      drupal_goto($uri[0], NULL, NULL, 301);
    }
  }
/*
 * Helper function to translate an ID from a source file to the corresponding
 * Drupal-side ID (nid, uid, etc.)
 * Note that the result may be ambiguous - for example, if you are importing
 * nodes from different content sets, they might have overlapping source IDs.
 *
 * @param $contenttype
 *  Content type to translate (e.g., 'node', 'user', etc.)
 * @param $oldid
 *  Primary key from input view
 * @return
 *  Drupal-side ID of the object
 */
function _migrate_xlat_get_new_id($contenttype, $oldid) {
  $result = db_query("SELECT mcsid
                      FROM {migrate_content_sets}
                      WHERE contenttype='%s'",
                     $contenttype);
  while ($row = db_fetch_object($result)) {
    static $maptables = array();
    if (!isset($maptables[$row->mcsid])) {
      $maptables[$row->mcsid] = migrate_map_table_name($row->mcsid);
    $sql = "SELECT destid
            FROM {" . $maptables[$row->mcsid] . "}
            WHERE sourceid='%s'";
    $id = db_result(db_query($sql, $oldid));
    if ($id) {
      return $id;
  return NULL;
define('MIGRATE_API_VERSION', 2);

/*
 * Implementation of hook_migrate_api().
function migrate_migrate_api() {
  $api = array(
    'path' => 'modules',
    'integration modules' => array(
      'comment' => array(
        'description' => t('Core migration support for the comment module'),
      'node' => array(
        'description' => t('Core migration support for the node module'),
      'profile' => array(
        'description' => t('Core migration support for the profile module'),
      ),
      'taxonomy' => array(
        'description' => t('Core migration support for the taxonomy module'),
      ),
      'user' => array(
        'description' => t('Core migration support for the user module'),
  return $api;
// ------------------------------------------------------------------
// Include file helpers - @merlinofchoas: borrowing heavily from views.module

/**
 * Get a list of modules that support the current migrate API.
 */
function migrate_get_module_apis($reset = FALSE) {
  static $cache = NULL;
  if ($reset) {
    $cache = NULL;
  }
  if (!isset($cache)) {
    $cache = array();
    foreach (module_implements('migrate_api') as $module) {
      $function = $module . '_migrate_api';
      $info = $function();
      if (isset($info['api']) && $info['api'] == 1.000) {
        if (isset($info['path'])) {
          $info['path'] = drupal_get_path('module', $module) . '/' . $info['path'];
        }
        else {
          $info['path'] = drupal_get_path('module', $module);
        }
        if (!isset($info['integration modules'])) {
          $info['integration modules'] = array($module => array());
        }
        $settings = variable_get('migrate_integration_settings', NULL);
        foreach ($info['integration modules'] as $intmod_name => $intmod_details) {
          // If the module was just entered as a string without details, we have to fix.
          if (!is_array($intmod_details)) {
            unset($info['integration modules'][$intmod_name]);
            $intmod_name = $intmod_details;
            $intmod_details = array();
          }

          $default_details = array(
            'description' => t('Support for the @intmod module.', array('@intmod' => $intmod_name)),
            'status' => TRUE,
          );

          // Allow override of defaults.
          $info['integration modules'][$intmod_name] = $intmod_details + $default_details;
          // Overwrite default status if set.
          if (isset($settings[$module][$intmod_name])) {
            $info['integration modules'][$intmod_name]['status'] = $settings[$module][$intmod_name];
          }
        }
        $cache[$module] = $info;
      }
      else {
        drupal_set_message(t('%function supports Migrate API version %modversion,
           Migrate module API version is %version - migration support not loaded.',
           array('%function' => $function, '%modversion' => $info['api'],
                 '%version' => MIGRATE_API_VERSION)));
      }
    }
  }
Moshe Weitzman's avatar
Moshe Weitzman committed

  return $cache;
}