summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.txt1
-rwxr-xr-xmigrate.info1
-rw-r--r--migrate_example/wine.inc78
-rw-r--r--migrate_example/wine.install.inc1
-rw-r--r--migrate_example/xml/producers.xml17
-rw-r--r--plugins/sources/multiitems.inc214
-rw-r--r--plugins/sources/xml.inc276
7 files changed, 587 insertions, 1 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 2e6ce3a..380e7bb 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -3,6 +3,7 @@ Next release
============
Features and enhancements
+- #1138096 - Added MigrateSourceMultiItems class for self-contained XML sources.
- #1101586 - Add shortcut methods for adding several field mappings at once.
- #1101592 - Replace --itemlimit with --limit, supporting time limits as well.
diff --git a/migrate.info b/migrate.info
index 68648a0..137ed12 100755
--- a/migrate.info
+++ b/migrate.info
@@ -28,6 +28,7 @@ files[] = plugins/destinations/table_copy.inc
files[] = plugins/sources/csv.inc
files[] = plugins/sources/json.inc
files[] = plugins/sources/list.inc
+files[] = plugins/sources/multiitems.inc
files[] = plugins/sources/sql.inc
files[] = plugins/sources/sqlmap.inc
files[] = plugins/sources/mssql.inc
diff --git a/migrate_example/wine.inc b/migrate_example/wine.inc
index e235f4f..48441d9 100644
--- a/migrate_example/wine.inc
+++ b/migrate_example/wine.inc
@@ -310,6 +310,84 @@ class WineProducerXMLMigration extends XMLMigration {
}
}
+/**
+ * TIP: An example of importing from an XML feed where both the id and the
+ * data to import are in the same file. The id is a part of the data. See
+ * the file in the xml directory - producers.xml which contains all IDs and
+ * producer data for this example.
+ *
+ * Note that, if basing a migration on an XML source, you need to derive it
+ * from XMLMigration instead of Migration.
+ */
+class WineProducerMultiXMLMigration extends XMLMigration {
+ public function __construct() {
+ parent::__construct();
+ $this->description = t('XML feed (multi items) of wine producers of the world');
+ $this->dependencies = array('WineRegion', 'WineUser');
+
+ // There isn't a consistent way to automatically identify appropriate "fields"
+ // from an XML feed, so we pass an explicit list of source fields
+ $fields = array(
+ 'name' => t('Producer name'),
+ 'description' => t('Description of producer'),
+ 'authorid' => t('Numeric ID of the author'),
+ 'region' => t('Name of region'),
+ );
+
+ // The source ID here is the one retrieved from each data item in the XML file, and
+ // used to identify specific items
+ $this->map = new MigrateSQLMap($this->machineName,
+ array(
+ 'sourceid' => array(
+ 'type' => 'varchar',
+ 'length' => 4,
+ 'not null' => TRUE,
+ )
+ ),
+ MigrateDestinationNode::getKeySchema()
+ );
+
+
+ // This can also be an URL instead of a file path.
+ $xml_folder = drupal_get_path('module', 'migrate_example') . '/xml/';
+ $items_url = $xml_folder . 'producers.xml';
+
+ // We use the MigrateSourceMultiItems class for any source where we obtain the list
+ // of IDs to process and the data for each item from the same file. Typically the data
+ // for an item is not contained in a single line within the source file. Examples include
+ // multiple items defined in a single xml file or a single json file where in both cases
+ // the id is part of the item.
+
+ $item_xpath = '/producers/producer'; // relative to document
+
+ $item_ID_xpath = 'sourceid'; // relative to item_xpath and gets assembled
+ // into full path /producers/producer/sourceid
+
+ $items_class = new MigrateItemsXML($items_url, $item_xpath, $item_ID_xpath);
+ $this->source = new MigrateSourceMultiItems($items_class, $fields);
+
+ $this->destination = new MigrateDestinationNode('migrate_example_producer');
+
+ // TIP: Note that for XML sources, in addition to the source field passed to
+ // addFieldMapping (the name under which it will be saved in the data row
+ // passed through the migration process) we specify the Xpath used to retrieve
+ // the value from the XML.
+ // TIP: Note that all xpaths for fields begin at the last element of the item
+ // xpath since each item xml chunk is processed individually.
+ // (ex. xpath=name is equivalent to a full xpath of /producers/producer/name)
+ $this->addFieldMapping('title', 'name')
+ ->xpath('name');
+ $this->addFieldMapping('uid', 'authorid')
+ ->xpath('authorid')
+ ->sourceMigration('WineUser')
+ ->defaultValue(1);
+ $this->addFieldMapping('Migrate Example Wine Regions', 'region')
+ ->xpath('region');
+ $this->addFieldMapping('body', 'description')
+ ->xpath('description');
+ }
+}
+
// TODO: Add node_reference field pointing to producer
class WineWineMigration extends AdvancedExampleMigration {
public function __construct() {
diff --git a/migrate_example/wine.install.inc b/migrate_example/wine.install.inc
index aa3e0f6..31c249d 100644
--- a/migrate_example/wine.install.inc
+++ b/migrate_example/wine.install.inc
@@ -57,6 +57,7 @@ function migrate_example_wine_disable() {
Migration::deregisterMigration('WineBestWith');
Migration::deregisterMigration('WineProducer');
Migration::deregisterMigration('WineProducerXML');
+ Migration::deregisterMigration('WineProducerMultiXML');
Migration::deregisterMigration('WineWine');
Migration::deregisterMigration('WineComment');
MigrationBase::deregisterMigration('WineFinish');
diff --git a/migrate_example/xml/producers.xml b/migrate_example/xml/producers.xml
new file mode 100644
index 0000000..8c39f24
--- /dev/null
+++ b/migrate_example/xml/producers.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<producers>
+ <producer>
+ <sourceid>0002</sourceid>
+ <name>Blue Sky Winery</name>
+ <description>Makers of Warm Sun Blush</description>
+ <authorid>1</authorid>
+ <region>Redwood Valley</region>
+ </producer>
+ <producer>
+ <sourceid>0003</sourceid>
+ <name>Meriam Winery</name>
+ <description>Makers of Extra Dry Chardonnay</description>
+ <authorid>9</authorid>
+ <region>Redwood Valley</region>
+ </producer>
+</producers>
diff --git a/plugins/sources/multiitems.inc b/plugins/sources/multiitems.inc
new file mode 100644
index 0000000..01b71b1
--- /dev/null
+++ b/plugins/sources/multiitems.inc
@@ -0,0 +1,214 @@
+<?php
+
+/**
+ * @file
+ * Support for migration from sources where data spans multiple lines
+ * (ex. xml, json) and IDs for the items are part of each item and multiple
+ * items reside in a single file.
+ */
+
+/**
+ * Extend the MigrateItems class to provide a means to obtain a list of IDs to
+ * be migrated from a given source (e.g., MigrateItemsXML extends MigrateItem to
+ * obtain a list of IDs from an XML document). This class also provides a means
+ * to obtain the data for a given migratable item given its ID.
+ */
+abstract class MigrateItems {
+ public function __construct() {}
+
+ /**
+ * Implementors are expected to return a string representing where the listing
+ * is obtained from (a URL, file directory, etc.)
+ *
+ * @return string
+ */
+ abstract public function __toString();
+
+ /**
+ * Implementors are expected to return an array of unique IDs, suitable for
+ * passing to the MigrateItem class to retrieve the data for a single item.
+ *
+ * @return array
+ */
+ abstract public function getIdList();
+
+ /**
+ * Implementors are expected to return a count of IDs available to be migrated.
+ *
+ * @param boolean $refresh
+ *
+ * @return int
+ */
+ abstract public function count($refresh = FALSE);
+
+ /**
+ * Implementors are expected to return an object representing a source item.
+ *
+ * @param mixed $id
+ *
+ * @return stdClass
+ */
+ abstract public function getItem($id);
+}
+
+
+/**
+ * Implementation of MigrateItems, for providing a list of IDs and for
+ * retrieving a parsed XML document given an ID from this list.
+ */
+
+/**
+ * Implementation of MigrateSource, providing the semantics of iterating over
+ * IDs provided by a MigrateItems and retrieving data from a MigrateItems.
+ */
+class MigrateSourceMultiItems extends MigrateSource {
+ /**
+ * MigrateItems object used to obtain the list of IDs and source for
+ * all objects.
+ *
+ * @var MigrateItems
+ */
+ protected $itemsClass;
+
+ /**
+ * List of available source fields.
+ *
+ * @var array
+ */
+ protected $fields = array();
+
+ /**
+ * Keep the current source ID handy while iterating.
+ * @var string
+ */
+ protected $id;
+
+ /**
+ * Simple initialization.
+ */
+ public function __construct(MigrateItems $items_class, $fields = array()) {
+ parent::__construct();
+
+ $this->itemsClass = $items_class;
+ $this->fields = $fields;
+ }
+
+ /**
+ * Return a string representing the source.
+ *
+ * @return string
+ */
+ public function __toString() {
+ return (string) $this->itemsClass;
+ }
+
+ /**
+ * Returns a list of fields available to be mapped from the source query.
+ * Since we can't reliably figure out what "fields" are in the source,
+ * it's up to the implementing Migration constructor to fill them in.
+ *
+ * @return array
+ * Keys: machine names of the fields (to be passed to addFieldMapping)
+ * Values: Human-friendly descriptions of the fields.
+ */
+ public function fields() {
+ return $this->fields;
+ }
+
+ /**
+ * It's the list class that knows how many records are available, so ask it.
+ *
+ * @param boolean $refresh
+ *
+ * @return int
+ */
+ public function count($refresh = FALSE) {
+ return $this->itemsClass->count($refresh);
+ }
+
+ /**
+ * Implementation of Iterator::rewind() - called before beginning a foreach loop.
+ */
+ public function rewind() {
+ $migration = Migration::currentMigration();
+ migrate_instrument_start('MigrateSourceList rewind');
+ $this->numProcessed = 0;
+ $idlist = $migration->getOption('idlist');
+ if ($idlist) {
+ $this->idList = explode(',', $idlist);
+ }
+ else {
+ $this->idList = $this->itemsClass->getIdList();
+ }
+ migrate_instrument_stop('MigrateSourceList rewind');
+ // Load up the first row
+ $this->next();
+ }
+
+ /**
+ * Implementation of Iterator::next() - called at the bottom of the loop implicitly,
+ * as well as explicitly from rewind().
+ */
+ public function next() {
+ $migration = Migration::currentMigration();
+ migrate_instrument_start('MigrateSourceList next');
+ $this->currentRow = NULL;
+ $this->currentKey = NULL;
+
+ // Enforce the itemlimit
+ $itemlimit = $migration->getItemLimit();
+
+ // Get next item (next ID not already in the map, unless needs_update=1)
+ while ($this->id = array_shift($this->idList)) {
+ // Skip empty IDs
+ if (empty($this->id)) {
+ continue;
+ }
+ // Enforce the itemlimit
+ if ($itemlimit && $this->numProcessed >= $itemlimit) {
+ return;
+ }
+ // Check the map - if it's already mapped, and not marked for update, skip it
+ $map_row = $migration->getMap()->getRowBySource(array($this->id));
+ if ($map_row && $map_row['needs_update'] == 0) {
+ continue;
+ }
+ // TODO: Also check message table (non-informational entries here indicate failed items, we don't
+ // want to run through them again)
+ $this->currentRow = $this->itemsClass->getItem($this->id);
+ if ($this->currentRow) {
+ $this->currentKey = array($this->id);
+ // Save the ID using the map source key - it will be used for mapping
+ $sourceKey = $migration->getMap()->getSourceKey();
+ $key_name = key($sourceKey);
+ $this->currentRow->$key_name = $this->id;
+ // Add map info to the row, if present
+ if ($map_row) {
+ foreach ($map_row as $field => $value) {
+ $field = 'migrate_map_' . $field;
+ $this->currentRow->$field = $value;
+ }
+ }
+ // Allow the Migration to prepare this row. prepareRow() can return boolean
+ // FALSE to stop processing this row. To add/modify fields on the
+ // result, modify $row by reference.
+ $return = TRUE;
+ if (method_exists($migration, 'prepareRow')) {
+ $return = $migration->prepareRow($this->currentRow);
+ }
+
+ if ($return !== FALSE) {
+ // OK, got a valid one, break out and return
+ $this->numProcessed++;
+ break;
+ }
+ }
+ else {
+ $this->currentKey = NULL;
+ }
+ }
+
+ migrate_instrument_stop('MigrateSourceList next');
+ }
+}
+
diff --git a/plugins/sources/xml.inc b/plugins/sources/xml.inc
index 1bd634e..d286538 100644
--- a/plugins/sources/xml.inc
+++ b/plugins/sources/xml.inc
@@ -3,8 +3,23 @@
/**
* @file
* Support for migration from XML sources.
+ *
+ * NOTE: There are two methods supported in this file.
+ *
+ * 1) List - ids are listed in an index xml file and the data for each item is
+ * stored in a separate xml file per item. Use MigrateSourceList class
+ * as the source.
+ *
+ * 2) MultiItems - ids are part of the item and all items are stored in a
+ * single xml file. Use MigrateSourceMultiItems class as the source.
+ *
+ * Both of these methods are described in more detail in the wine migration
+ * example.
*/
+/*===========================================================================*/
+/* List Method */
+/*===========================================================================*/
/**
* Implementation of MigrateList, for retrieving a list of IDs to be migrated
* from an XML document.
@@ -52,7 +67,7 @@ class MigrateListXML extends MigrateList {
foreach (libxml_get_errors() as $error) {
$migration->showMessage($error);
}
- return array();
+ return NULL;
}
}
@@ -269,3 +284,262 @@ abstract class XMLMigration extends Migration {
}
}
}
+
+/*===========================================================================*/
+/* MultiItems Method */
+/*===========================================================================*/
+/**
+ * Implementation of MigrateItems, for providing a list of IDs and for
+ * retrieving a parsed XML document given an ID from this list.
+ */
+class MigrateItemsXML extends MigrateItems {
+ /**
+ * A URL pointing to an XML document containing the ids and data.
+ *
+ * @var string
+ */
+ protected $xmlUrl;
+
+ /**
+ * xpath identifying the element used for each item
+ */
+ protected $itemXpath;
+ public function getItemXpath() {
+ return $this->itemXpath;
+ }
+
+ /**
+ * xpath identifying the subelement under itemXpath that holds the id for
+ * each item.
+ */
+ protected $itemIDXpath;
+ public function getIDXpath() {
+ return $this->itemIDXpath;
+ }
+
+ public function __construct($xml_url, $item_xpath='item', $itemID_xpath='id') {
+ parent::__construct();
+ $this->xmlUrl = $xml_url;
+ $this->itemXpath = $item_xpath;
+ $this->itemIDXpath = $itemID_xpath;
+
+ // Suppress errors during parsing, so we can pick them up after
+ libxml_use_internal_errors(TRUE);
+ }
+
+ /**
+ * Our public face is the URL we're getting items from
+ *
+ * @return string
+ */
+ public function __toString() {
+ return 'url = ' . $this->xmlUrl . ' | item xpath = ' . $this->itemXpath .
+ ' | item ID xpath = ' . $this->itemIDXpath;
+ }
+
+ /**
+ * Load the XML at the given URL, and return an array of the IDs found
+ * within it.
+ *
+ * @return array
+ */
+ public function getIdList() {
+ migrate_instrument_start("Retrieve $this->xmlUrl");
+ $xml = simplexml_load_file($this->xmlUrl);
+ migrate_instrument_stop("Retrieve $this->xmlUrl");
+ if ($xml) {
+ return $this->getIDsFromXML($xml);
+ }
+ else {
+ $migration = Migration::currentMigration();
+ $migration->showMessage(t('Loading of !xmlUrl failed:',
+ array('!xmlUrl' => $this->xmlUrl)));
+ foreach (libxml_get_errors() as $error) {
+ $migration->showMessage($error);
+ }
+ return NULL;
+ }
+ }
+
+ /**
+ * Given an XML object, parse out the IDs for processing and return them as
+ * an array. The location of the IDs in the XML are based on the item xpath
+ * and item ID xpath set in the constructor.
+ * eg, xpath = itemXpath . '/' . itemIDXpath
+ * IDs are cached. The list of IDs are returned from the cache except when
+ * this is the first call (ie, cache is NULL) OR the refresh parameter is
+ * TRUE.
+ *
+ * @param SimpleXMLElement $xml
+ * @param boolean $refresh
+ *
+ * @return array
+ */
+ protected $cache_ids = NULL;
+ protected function getIDsFromXML(SimpleXMLElement $xml, $refresh=FALSE) {
+ if ($refresh !== TRUE && $this->cache_ids != NULL) {
+ return $this->cache_ids;
+ }
+
+ $this->cache_ids = NULL;
+ $ids = array();
+ $full_xpath = $this->itemXpath . '/' . $this->itemIDXpath;
+
+ $result = $xml->xpath($full_xpath);
+ if ($result) {
+ if (count($result) > 1) {
+ foreach ($result as $id) {
+ $ids[] = (string)$id;
+ }
+ }
+ else {
+ $ids[] = (string)$result[0];
+ }
+ $this->cache_ids = $ids;
+ return $ids;
+ }
+ else {
+ return NULL;
+ }
+ }
+
+ /**
+ * Return a count of all available IDs from the source listing.
+ */
+ public function count($refresh = FALSE) {
+ $count = 0;
+ $xml = simplexml_load_file($this->xmlUrl);
+ if ($xml) {
+ $ids = $this->getIDsFromXML($xml,$refresh);
+ $count = count($ids);
+ }
+ return $count;
+ }
+
+ /**
+ * Load the XML at the given URL, and return an array of the Items found
+ * within it.
+ *
+ * @return array
+ */
+ public function getAllItems() {
+ $xml = simplexml_load_file($this->xmlUrl);
+ if ($xml) {
+ return $this->getItemsFromXML($xml);
+ }
+ else {
+ $migration = Migration::currentMigration();
+ $migration->showMessage(t('Loading of !xmlUrl failed:',
+ array('!xmlUrl' => $this->xmlUrl)));
+ foreach (libxml_get_errors() as $error) {
+ $migration->showMessage($error);
+ }
+ return NULL;
+ }
+ }
+
+ /**
+ * Given an XML object, parse out the items for processing and return them as
+ * an array. The location of the items in the XML are based on the item xpath
+ * set in the constructor. Items are cached. The list of items are returned
+ * from the cache except when this is the first call (ie, cache is NULL) OR
+ * the refresh parameter is TRUE.
+ *
+ * Items are cached as an array of key=ID and value=stdclass object with
+ * attribute xml containing the xml SimpleXMLElement object of the item.
+ *
+ * @param SimpleXMLElement $xml
+ * @param boolean $refresh
+ *
+ * @return array
+ */
+ protected $cache_items = NULL;
+ public function getItemsFromXML(SimpleXMLElement $xml, $refresh=FALSE) {
+ if ($refresh !== FALSE && $this->cache_items != NULL) {
+ return $this->cache_items;
+ }
+
+ $this->cache_items = NULL;
+ $items = array();
+ $result = $xml->xpath($this->itemXpath);
+
+ if ($result) {
+ if (count($result) > 1) {
+ foreach ($result as $item_xml) {
+ $id = $this->getItemID($item_xml);
+ $item = new stdclass;
+ $item->xml = $item_xml;
+ $items[$id] = $item;
+ }
+ }
+ else {
+ $item_xml = $result[0];
+ $id = $this->getItemID($item_xml);
+ $item = new stdclass;
+ $item->xml = $item_xml;
+ $items[$id] = $item;
+ }
+ $this->cache_items = $items;
+ return $items;
+ }
+ else {
+ return NULL;
+ }
+ }
+
+ /**
+ * Get the item ID from the itemXML based on itemIDXpath.
+ *
+ * @return string
+ */
+ protected function getItemID($itemXML){
+ return $this->getElementValue($itemXML, $this->itemIDXpath);
+ }
+
+ /**
+ * Get an element from the itemXML based on an xpath.
+ *
+ * @return string
+ */
+ protected function getElementValue($itemXML,$xpath){
+ $value = NULL;
+ if ($itemXML) {
+ $result = $itemXML->xpath($xpath);
+ if ($result)
+ $value = (string)$result[0];
+ }
+ return $value;
+ }
+
+ /**
+ * Implementors are expected to return an object representing a source item.
+ * Items are cached as an array of key=ID and value=stdclass object with
+ * attribute xml containing the xml SimpleXMLElement object of the item.
+ *
+ * @param mixed $id
+ *
+ * @return stdClass
+ */
+ public function getItem($id) {
+ // Make sure we actually have an ID
+ if (empty($id)) {
+ return NULL;
+ }
+ $items = $this->getAllItems();
+ $item = $items[$id];
+ if ($item) {
+ return $item;
+ }
+ else {
+ $migration = Migration::currentMigration();
+ $message = t('Loading of !itemxml given id failed:', array('!id' => $id));
+ foreach (libxml_get_errors() as $error) {
+ $message .= "\n" . $error->message;
+ }
+ $migration->getMap()->saveMessage(
+ array($id), $message, MigrationBase::MESSAGE_ERROR);
+ libxml_clear_errors();
+ return NULL;
+ }
+ }
+}