Newer
Older
Mike Ryan
committed
/**
* @file
* API and drush commands to support migration of data from external sources
* into a Drupal installation.
*/
// TODO:
// Continue hook_schema_alter() for map & message tables?
// Views hooks for map/message tables
// xlat support?
// Integration API support?
// Documentation
// Tests
/**
* Retrieve a list of all active migrations, ordered by dependencies. To be
* recognized, a class must be non-abstract, derived from Migration, and the
* class name must end in "Migration".
*
* @param $refresh
* The list is statically cached - if TRUE, this forces the list to be rebuilt.
*
* @return
* Array of migration objects, keyed by the class name.
*/
function migrate_migrations($refresh = FALSE) {
// First, get a list of all Migration classes as a starting point. Note
// we set up PHP errors to be handled by throwing exceptions, so we can deal
// with migrations that have parameters and thus can't be instantiated here.
$error_handler = set_error_handler('migrate_exception_error_handler');
$migrations = _migrate_class_list('%Migration', 'MigrationBase', $refresh);
set_error_handler($error_handler);
// Then, let modules add instances of dynamic migrations, and perhaps remove
// the classes underlying them
drupal_alter('migrations', $migrations);
return $migrations;
}
function migrate_exception_error_handler($errno, $errstr, $errfile, $errline ) {
throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
* Invoke any available handlers attached to a given destination type.
* If any handlers have dependencies defined, they will be invoked after
* the specified handlers.
*
* @param $destination
* Destination type ('Node', 'User', etc.) - generally the same string as
* the destination class name without the MigrateDestination prefix.
* @param $method
* Method name such as 'prepare' (called at the beginning of an import operation)
* or 'complete' (called at the end of an import operation).
* @param ...
* Parameters to be passed to the handler.
function migrate_handler_invoke_all($destination, $method) {
$args = func_get_args();
array_shift($args);
array_shift($args);
$return = array();
$class_list = _migrate_class_list('%Handler', 'MigrateDestinationHandler');
foreach ($class_list as $class_name => $handler) {
if ($handler->handlesType($destination) && method_exists($handler, $method)) {
$result = call_user_func_array(array($handler, $method), $args);
if (isset($result) && is_array($result)) {
$return = array_merge_recursive($return, $result);
}
elseif (isset($result)) {
$return[] = $result;
}
/**
* Invoke any available handlers attached to a given field type.
* If any handlers have dependencies defined, they will be invoked after
* the specified handlers.
*
Moshe Weitzman
committed
* @param $entity
* The object we are building up before calling example_save().
* @param $field_info
* Array of info on the field, from field_info_field().
* @param $instance
* Array of info in the field instance, from field_info_instances().
* @param $values
* Array of incoming values, to be transformed into the appropriate structure
* for the field type.
*/
function migrate_field_handler_invoke_all(stdClass $entity, array $field_info, array $instance,
array $values) {
$return = array();
$type = $field_info['type'];
$class_list = _migrate_class_list('%FieldHandler', 'MigrateFieldHandler');
foreach ($class_list as $class_name => $handler) {
if ($handler->handlesType($type) && method_exists($handler, 'prepare')) {
migrate_instrument_start($class_name . '->prepare');
$result = call_user_func_array(array($handler, 'prepare'),
array($entity, $field_info, $instance, $values));
migrate_instrument_stop($class_name . '->prepare');
if (isset($result) && is_array($result)) {
$return = array_merge_recursive($return, $result);
}
elseif (isset($result)) {
$return[] = $result;
}
}
}
return $return;
}
/**
* For a given class name pattern and parent class, identify and instantiate singleton
* objects for any non-abstract classes with names matching the pattern and derived
* from the parent, returning an array of the objects indexed by class name. The
* array will be ordered such that any classes with dependencies are listed after
* the classes they are dependent on.
*
* TODO: Is there a way to do this without depending on naming conventions and searching
* the registry? I.e., find all subclasses of a given class including potential
* autoloads?
*
* @param $class_pattern
* SQL LIKE pattern for class names to match (e.g., '%Migration').
* @param $parent_class
* Name of a class from which results will be derived.
* @param $refresh
* The list is statically cached - if TRUE, this forces the list to be rebuilt.
* @return
* Array of objects, keyed by the class name.
*/
function _migrate_class_list($class_pattern, $parent_class, $refresh = FALSE) {
static $class_lists = array();
if (!isset($class_lists[$class_pattern]) || $refresh) {
$class_lists[$class_pattern] = array();
// Discover matching class names registered with Drupal
Mike Ryan
committed
$result = db_select('registry', 'r')
->fields('r', array('name'))
->condition('type', 'class')
->condition('name', $class_pattern, 'LIKE')
->orderBy('name')
->execute();
foreach ($result as $record) {
// Validate it's an implemented subclass of the parent class
$class = new ReflectionClass($record->name);
if (!$class->isAbstract() && $class->isSubclassOf($parent_class)) {
Mike Ryan
committed
if ($parent_class == 'MigrationBase') {
Mike Ryan
committed
if (class_exists($record->name)) {
$machine_name = $record->name;
}
else {
$machine_name = substr($record->name, 0, strlen($record->name) - strlen('Migration'));
}
Mike Ryan
committed
$object = Migration::getInstance($machine_name);
// If the constructor has required parameters, this may fail. We will
// silently ignore - it is up to the implementor of such a class to
// instantiate it in hook_migrations_alter().
try {
$object = new $record->name;
}
catch (Exception $e) {
}
if (isset($object)) {
$dependencies = $object->getDependencies();
if (count($dependencies) > 0) {
// Set classes with dependencies aside for reordering
$dependent_classes[$record->name] = $object;
$required_classes += $dependencies;
}
else {
// No dependencies, just add
$class_lists[$class_pattern][$record->name] = $object;
}
// Validate that each depended-on class at least exists
foreach ($required_classes as $class_name) {
Mike Ryan
committed
if (!class_exists($class_name)) {
$class_name .= 'Migration';
}
if ((!isset($dependent_classes[$class_name])) && !isset($class_lists[$class_pattern][$class_name])) {
throw new MigrateException(t('Dependency on non-existent class !class - make sure ' .
'you have added the file defining !class to the .info file.',
array('!class' => $class_name)));
}
}
// Scan modules with dependencies - we'll take 20 passes at it before
// giving up
$iterations = 0;
while (count($dependent_classes) > 0) {
if ($iterations++ > 20) {
$class_names = implode(',', array_keys($dependent_classes));
throw new MigrateException(t('Failure to sort class list - most likely due ' .
'to circular dependencies involving !class_names.',
array('!class_names' => $class_names)));
}
foreach ($dependent_classes as $name => $object) {
$ready = TRUE;
// Scan all the dependencies for this class and make sure they're all
// in the final list
foreach ($object->getDependencies() as $dependency) {
Mike Ryan
committed
if (!class_exists($dependency)) {
$dependency .= 'Migration';
}
if (!isset($class_lists[$class_pattern][$dependency])) {
Moshe Weitzman
committed
}
}
if ($ready) {
// Yes they are! Move this class to the final list
Moshe Weitzman
committed
}
}
}
Moshe Weitzman
committed
}
/**
* Resource functions modeled on Drupal's timer functions
*/
Mike Ryan
committed
/**
* Implementation of hook_watchdog().
* Find the migration that is currently running and notify it.
*
* @param array $log_entry
*/
function migrate_watchdog($log_entry) {
if ($migration = Migration::currentMigration()) {
// TODO: set appropriate level
$migration->saveMessage(t($log_entry['message'], $log_entry['variables']));
}
}
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
/**
* Save memory usage with the specified name. If you start and stop the same
* memory name multiple times, the measured differences will be accumulated.
*
* @param name
* The name of the memory measurement.
*/
function migrate_memory_start($name) {
global $_migrate_memory;
$_migrate_memory[$name]['start'] = memory_get_usage();
$_migrate_memory[$name]['count'] =
isset($_migrate_memory[$name]['count']) ? ++$_migrate_memory[$name]['count'] : 1;
}
/**
* Read the current memory value without recording the change.
*
* @param name
* The name of the memory measurement.
* @return
* The change in bytes since the last start.
*/
function migrate_memory_read($name) {
global $_migrate_memory;
if (isset($_migrate_memory[$name]['start'])) {
$stop = memory_get_usage();
$diff = $stop - $_migrate_memory[$name]['start'];
if (isset($_migrate_memory[$name]['bytes'])) {
$diff += $_migrate_memory[$name]['bytes'];
}
return $diff;
}
return $_migrate_memory[$name]['bytes'];
}
/**
* Stop the memory counter with the specified name.
*
* @param name
* The name of the memory measurement.
* @return
* A memory array. The array contains the number of times the memory has been
* started and stopped (count) and the accumulated memory difference value in bytes.
*/
function migrate_memory_stop($name) {
global $_migrate_memory;
if (isset($_migrate_memory[$name])) {
if (isset($_migrate_memory[$name]['start'])) {
$stop = memory_get_usage();
$diff = $stop - $_migrate_memory[$name]['start'];
if (isset($_migrate_memory[$name]['bytes'])) {
$_migrate_memory[$name]['bytes'] += $diff;
}
else {
$_migrate_memory[$name]['bytes'] = $diff;
}
unset($_migrate_memory[$name]['start']);
* Start measuring time and (optionally) memory consumption over a section of code.
* Note that the memory consumption measurement is generally not useful in
* lower areas of the code, where data is being generated that will be freed
* by the next call to the same area. For example, measuring the memory
* consumption of db_query is not going to be helpful.
*
* @param $name
* The name of the measurement.
* @param $include_memory
* Measure both memory and timers. Defaults to FALSE (timers only).
function migrate_instrument_start($name, $include_memory = FALSE) {
if ($_migrate_track_memory && $include_memory) {
migrate_memory_start($name);
}
if ($_migrate_track_timer) {
timer_start($name);
}
}
/**
* Stop measuring both memory and time consumption over a section of code.
*
* @param $name
* The name of the measurement.
*/
function migrate_instrument_stop($name) {
global $_migrate_track_memory, $_migrate_track_timer;
if ($_migrate_track_timer) {
timer_stop($name);
}
if ($_migrate_track_memory) {
migrate_memory_stop($name);
}
/**
* Call hook_migrate_overview for overall documentation on implemented migrations.
*/
function migrate_overview() {
$overview = '';
$results = module_invoke_all('migrate_overview');
foreach ($results as $result) {
$overview .= $result . ' ';
}
return $overview;
}
// TODO: The functions below are D6 functions of some potential use in D7, that
// haven't been updated/integrated yet
Moshe Weitzman
committed
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/**
* Implementation of hook_schema_alter().
*/
/*
function migrate_schema_alter(&$schema) {
// Check for table existence - at install time, hook_schema_alter() may be called
// before our install hook.
if (db_table_exists('migrate_content_sets')) {
$result = db_query("SELECT * FROM {migrate_content_sets}");
while ($content_set = db_fetch_object($result)) {
$maptablename = migrate_map_table_name($content_set->mcsid);
$msgtablename = migrate_message_table_name($content_set->mcsid);
// Get the proper field definition for the sourcekey
$view = views_get_view($content_set->view_name);
if (!$view) {
drupal_set_message(t('View !view does not exist - either (re)create this view, or
remove the migrate content set using it.', array('!view' => $content_set->view_name)));
continue;
}
// Must do this to load the database
$view->init_query();
// TODO: For now, PK must be in base_table
if (isset($view->base_database)) {
$tabledb = $view->base_database;
}
else {
$tabledb = 'default';
}
$tablename = $view->base_table;
$sourceschema = _migrate_inspect_schema($tablename, $tabledb);
// If the PK of the content set is defined, make sure we have a mapping table
$sourcekey = $content_set->sourcekey;
if ($sourcekey) {
$sourcefield = $sourceschema['fields'][$sourcekey];
if (!$sourcefield) {
// strip base table name if views prepended it
$baselen = drupal_strlen($tablename);
if (!strncasecmp($sourcekey, $tablename . '_', $baselen + 1)) {
$sourcekey = drupal_substr($sourcekey, $baselen + 1);
}
$sourcefield = $sourceschema['fields'][$sourcekey];
}
// We don't want serial fields to behave serially, so change to int
if ($sourcefield['type'] == 'serial') {
$sourcefield['type'] = 'int';
}
$schema[$maptablename] = _migrate_map_table_schema($sourcefield);
$schema[$maptablename]['name'] = $maptablename;
$schema[$msgtablename] = _migrate_message_table_schema($sourcefield);
$schema[$msgtablename]['name'] = $msgtablename;
}
}
}
Moshe Weitzman
committed
}
Moshe Weitzman
committed
/*
* Translate URIs from an old site to the new one
* Requires adding RewriteRules to .htaccess. For example, if the URLs
* for news articles had the form
* http://example.com/issues/news/[OldID].html, use this rule:
*
* RewriteRule ^issues/news/([0-9]+).html$ /migrate/xlat/node/$1 [L]
*
* @param $contenttype
* Content type to translate (e.g., 'node', 'user', etc.)
* @param $oldid
* Primary key from input view
*/
function migrate_xlat($contenttype, $oldid) {
if ($contenttype && $oldid) {
$newid = _migrate_xlat_get_new_id($contenttype, $oldid);
if ($newid) {
$uri = migrate_invoke_all("xlat_$contenttype", $newid);
drupal_goto($uri[0], NULL, NULL, 301);
}
}
Moshe Weitzman
committed
}
/*
* Helper function to translate an ID from a source file to the corresponding
* Drupal-side ID (nid, uid, etc.)
* Note that the result may be ambiguous - for example, if you are importing
* nodes from different content sets, they might have overlapping source IDs.
*
* @param $contenttype
* Content type to translate (e.g., 'node', 'user', etc.)
* @param $oldid
* Primary key from input view
* @return
* Drupal-side ID of the object
*/
function _migrate_xlat_get_new_id($contenttype, $oldid) {
$result = db_query("SELECT mcsid
FROM {migrate_content_sets}
WHERE contenttype='%s'",
$contenttype);
while ($row = db_fetch_object($result)) {
static $maptables = array();
if (!isset($maptables[$row->mcsid])) {
$maptables[$row->mcsid] = migrate_map_table_name($row->mcsid);
Moshe Weitzman
committed
}
$sql = "SELECT destid
FROM {" . $maptables[$row->mcsid] . "}
WHERE sourceid='%s'";
$id = db_result(db_query($sql, $oldid));
if ($id) {
return $id;
Moshe Weitzman
committed
}
}
Moshe Weitzman
committed
}
Mike Ryan
committed
define('MIGRATE_API_VERSION', 2);
/*
* Implementation of hook_migrate_api().
function migrate_migrate_api() {
$api = array(
'path' => 'modules',
'integration modules' => array(
'comment' => array(
'description' => t('Core migration support for the comment module'),
'node' => array(
'description' => t('Core migration support for the node module'),
'profile' => array(
'description' => t('Core migration support for the profile module'),
),
'taxonomy' => array(
'description' => t('Core migration support for the taxonomy module'),
),
'user' => array(
'description' => t('Core migration support for the user module'),
// ------------------------------------------------------------------
// Include file helpers - @merlinofchoas: borrowing heavily from views.module
/**
* Get a list of modules that support the current migrate API.
*/
function migrate_get_module_apis($reset = FALSE) {
static $cache = NULL;
if ($reset) {
$cache = NULL;
}
if (!isset($cache)) {
$cache = array();
foreach (module_implements('migrate_api') as $module) {
$function = $module . '_migrate_api';
$info = $function();
if (isset($info['api']) && $info['api'] == 1.000) {
if (isset($info['path'])) {
$info['path'] = drupal_get_path('module', $module) . '/' . $info['path'];
}
else {
$info['path'] = drupal_get_path('module', $module);
}
if (!isset($info['integration modules'])) {
$info['integration modules'] = array($module => array());
}
$settings = variable_get('migrate_integration_settings', NULL);
foreach ($info['integration modules'] as $intmod_name => $intmod_details) {
// If the module was just entered as a string without details, we have to fix.
if (!is_array($intmod_details)) {
unset($info['integration modules'][$intmod_name]);
$intmod_name = $intmod_details;
$intmod_details = array();
}
$default_details = array(
'description' => t('Support for the @intmod module.', array('@intmod' => $intmod_name)),
'status' => TRUE,
);
// Allow override of defaults.
$info['integration modules'][$intmod_name] = $intmod_details + $default_details;
// Overwrite default status if set.
if (isset($settings[$module][$intmod_name])) {
$info['integration modules'][$intmod_name]['status'] = $settings[$module][$intmod_name];
}
}
$cache[$module] = $info;
}
else {
drupal_set_message(t('%function supports Migrate API version %modversion,
Migrate module API version is %version - migration support not loaded.',
array('%function' => $function, '%modversion' => $info['api'],
'%version' => MIGRATE_API_VERSION)));
}
}
}