summaryrefslogtreecommitdiffstats
path: root/core/lib/Drupal/Component/Gettext/PoStreamReader.php
diff options
context:
space:
mode:
Diffstat (limited to 'core/lib/Drupal/Component/Gettext/PoStreamReader.php')
-rw-r--r--core/lib/Drupal/Component/Gettext/PoStreamReader.php589
1 files changed, 589 insertions, 0 deletions
diff --git a/core/lib/Drupal/Component/Gettext/PoStreamReader.php b/core/lib/Drupal/Component/Gettext/PoStreamReader.php
new file mode 100644
index 0000000..bd916a7
--- /dev/null
+++ b/core/lib/Drupal/Component/Gettext/PoStreamReader.php
@@ -0,0 +1,589 @@
+<?php
+
+/**
+ * @file
+ * Definition of Drupal\Component\Gettext\PoStreamReader.
+ */
+
+namespace Drupal\Component\Gettext;
+
+use Drupal\Component\Gettext\PoReaderInterface;
+use Drupal\Component\Gettext\PoStreamInterface;
+use Drupal\Component\Gettext\PoHeader;
+
+/**
+ * Implements Gettext PO stream reader.
+ *
+ * The PO file format parsing is implemented according to the documentation at
+ * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
+ */
+class PoStreamReader implements PoStreamInterface, PoReaderInterface {
+
+ /**
+ * Source line number of the stream being parsed.
+ *
+ * @var int
+ */
+ private $_line_number = 0;
+
+ /**
+ * Parser context for the stream reader state machine.
+ *
+ * Possible contexts are:
+ * - 'COMMENT' (#)
+ * - 'MSGID' (msgid)
+ * - 'MSGID_PLURAL' (msgid_plural)
+ * - 'MSGCTXT' (msgctxt)
+ * - 'MSGSTR' (msgstr or msgstr[])
+ * - 'MSGSTR_ARR' (msgstr_arg)
+ *
+ * @var string
+ */
+ private $_context = 'COMMENT';
+
+ /**
+ * Current entry being read. Incomplete.
+ *
+ * @var array
+ */
+ private $_current_item = array();
+
+ /**
+ * Current plural index for plural translations.
+ *
+ * @var int
+ */
+ private $_current_plural_index = 0;
+
+ /**
+ * URI of the PO stream that is being read.
+ *
+ * @var string
+ */
+ private $_uri = '';
+
+ /**
+ * Language code for the PO stream being read.
+ *
+ * @var string
+ */
+ private $_langcode = NULL;
+
+ /**
+ * Size of the current PO stream.
+ *
+ * @var int
+ */
+ private $_size;
+
+ /**
+ * File handle of the current PO stream.
+ *
+ * @var resource
+ */
+ private $_fd;
+
+ /**
+ * The PO stream header.
+ *
+ * @var Drupal\Component\Gettext\PoHeader
+ */
+ private $_header;
+
+ /**
+ * Object wrapper for the last read source/translation pair.
+ *
+ * @var Drupal\Component\Gettext\PoItem
+ */
+ private $_last_item;
+
+ /**
+ * Indicator of whether the stream reading is finished.
+ *
+ * @var boolean
+ */
+ private $_finished;
+
+ /**
+ * Array of translated error strings recorded on reading this stream so far.
+ *
+ * @var array
+ */
+ private $_errors;
+
+ /**
+ * Implements Drupal\Component\Gettext\PoMetadataInterface::getLangcode().
+ */
+ public function getLangcode() {
+ return $this->_langcode;
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoMetadataInterface::setLangcode().
+ */
+ public function setLangcode($langcode) {
+ $this->_langcode = $langcode;
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoMetadataInterface::getHeader().
+ */
+ public function getHeader() {
+ return $this->_header;
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
+ *
+ * Not applicable to stream reading and therefore not implemented.
+ */
+ public function setHeader(PoHeader $header) {
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoStreamInterface::getURI().
+ */
+ public function getURI() {
+ return $this->_uri;
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoStreamInterface::setURI().
+ */
+ public function setURI($uri) {
+ $this->_uri = $uri;
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoStreamInterface::open().
+ *
+ * Opens the stream and reads the header. The stream is ready for reading
+ * items after.
+ *
+ * @throws Exception
+ * If the URI is not yet set.
+ */
+ public function open() {
+ if (!empty($this->_uri)) {
+ $this->_fd = fopen($this->_uri, 'rb');
+ $this->_size = ftell($this->_fd);
+ $this->readHeader();
+ }
+ else {
+ throw new Exception('Cannot open stream without URI set.');
+ }
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoStreamInterface::close().
+ *
+ * @throws Exception
+ * If the stream is not open.
+ */
+ public function close() {
+ if ($this->_fd) {
+ fclose($this->_fd);
+ }
+ else {
+ throw new Exception('Cannot close stream that is not open.');
+ }
+ }
+
+ /**
+ * Implements Drupal\Component\Gettext\PoReaderInterface::readItem().
+ */
+ public function readItem() {
+ // Clear out the last item.
+ $this->_last_item = NULL;
+
+ // Read until finished with the stream or a complete item was identified.
+ while (!$this->_finished && is_null($this->_last_item)) {
+ $this->readLine();
+ }
+
+ return $this->_last_item;
+ }
+
+ /**
+ * Read the header from the PO stream.
+ *
+ * The header is a special case PoItem, using the empty string as source and
+ * key-value pairs as translation. We just reuse the item reader logic to
+ * read the header.
+ */
+ private function readHeader() {
+ $item = $this->readItem();
+ $header = new PoHeader;
+ $header->setFromString(trim($item->getTranslation()));
+ $this->_header = $header;
+ }
+
+ /**
+ * Reads a line from the PO stream and stores data internally.
+ *
+ * Expands $this->_current_item based on new data for the current item. If
+ * this line ends the current item, it is saved with setItemFromArray() with
+ * data from $this->_current_item.
+ *
+ * An internal state machine is maintained in this reader using $this->_context
+ * as the reading state. PO items are inbetween COMMENT states (when items have
+ * at least one line or comment inbetween them or indicated by MSGSTR or
+ * MSGSTR_ARR followed immediately by an MSGID or MSGCTXT (when items closely
+ * follow each other).
+ *
+ * @return
+ * FALSE if an error was logged, NULL otherwise. The errors are considered
+ * non-blocking, so reading can continue, while the errors are collected
+ * for later presentation.
+ */
+ private function readLine() {
+ // Read a line and set the stream finished indicator if it was not
+ // possible anymore.
+ $line = fgets($this->_fd);
+ $this->_finished = ($line === FALSE);
+
+ if (!$this->_finished) {
+
+ if ($this->_line_number == 0) {
+ // The first line might come with a UTF-8 BOM, which should be removed.
+ $line = str_replace("\xEF\xBB\xBF", '', $line);
+ // Current plurality for 'msgstr[]'.
+ $this->_current_plural_index = 0;
+ }
+
+ // Track the line number for error reporting.
+ $this->_line_number++;
+
+ // Initialize common values for error logging.
+ $log_vars = array(
+ '%uri' => $this->getURI(),
+ '%line' => $this->_line_number,
+ );
+ $t = get_t();
+
+ // Trim away the linefeed. \\n might appear at the end of the string if
+ // another line continuing the same string follows. We can remove that.
+ $line = trim(strtr($line, array("\\\n" => "")));
+
+ if (!strncmp('#', $line, 1)) {
+ // Lines starting with '#' are comments.
+
+ if ($this->_context == 'COMMENT') {
+ // Already in comment context, add to current comment.
+ $this->_current_item['#'][] = substr($line, 1);
+ }
+ elseif (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
+ // We are currently in string context, save current item.
+ $this->setItemFromArray($this->_current_item);
+
+ // Start a new entry for the comment.
+ $this->_current_item = array();
+ $this->_current_item['#'][] = substr($line, 1);
+
+ $this->_context = 'COMMENT';
+ return;
+ }
+ else {
+ // A comment following any other context is a syntax error.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
+ return FALSE;
+ }
+ return;
+ }
+ elseif (!strncmp('msgid_plural', $line, 12)) {
+ // A plural form for the current source string.
+
+ if ($this->_context != 'MSGID') {
+ // A plural form can only be added to an msgid directly.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Remove 'msgid_plural' and trim away whitespace.
+ $line = trim(substr($line, 12));
+
+ // Only the plural source string is left, parse it.
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // The plural form must be wrapped in quotes.
+ $this->_errors[] = $t('The translation stream %uri contains a syntax error on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Append the plural source to the current entry.
+ if (is_string($this->_current_item['msgid'])) {
+ // The first value was stored as string. Now we know the context is
+ // plural, it is converted to array.
+ $this->_current_item['msgid'] = array($this->_current_item['msgid']);
+ }
+ $this->_current_item['msgid'][] = $quoted;
+
+ $this->_context = 'MSGID_PLURAL';
+ return;
+ }
+ elseif (!strncmp('msgid', $line, 5)) {
+ // Starting a new message.
+
+ if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
+ // We are currently in string context, save current item.
+ $this->setItemFromArray($this->_current_item);
+
+ // Start a new context for the msgid.
+ $this->_current_item = array();
+ }
+ elseif ($this->_context == 'MSGID') {
+ // We are currently already in the context, meaning we passed an id with no data.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Remove 'msgid' and trim away whitespace.
+ $line = trim(substr($line, 5));
+
+ // Only the message id string is left, parse it.
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // The message id must be wrapped in quotes.
+ $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
+ return FALSE;
+ }
+
+ $this->_current_item['msgid'] = $quoted;
+ $this->_context = 'MSGID';
+ return;
+ }
+ elseif (!strncmp('msgctxt', $line, 7)) {
+ // Starting a new context.
+
+ if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
+ // We are currently in string context, save current item.
+ $this->setItemFromArray($this->_current_item);
+ $this->_current_item = array();
+ }
+ elseif (!empty($this->_current_item['msgctxt'])) {
+ // A context cannot apply to another context.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Remove 'msgctxt' and trim away whitespaces.
+ $line = trim(substr($line, 7));
+
+ // Only the msgctxt string is left, parse it.
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // The context string must be quoted.
+ $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ $this->_current_item['msgctxt'] = $quoted;
+
+ $this->_context = 'MSGCTXT';
+ return;
+ }
+ elseif (!strncmp('msgstr[', $line, 7)) {
+ // A message string for a specific plurality.
+
+ if (($this->_context != 'MSGID') &&
+ ($this->_context != 'MSGCTXT') &&
+ ($this->_context != 'MSGID_PLURAL') &&
+ ($this->_context != 'MSGSTR_ARR')) {
+ // Plural message strings must come after msgid, msgxtxt,
+ // msgid_plural, or other msgstr[] entries.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Ensure the plurality is terminated.
+ if (strpos($line, ']') === FALSE) {
+ $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Extract the plurality.
+ $frombracket = strstr($line, '[');
+ $this->_current_plural_index = substr($frombracket, 1, strpos($frombracket, ']') - 1);
+
+ // Skip to the next whitespace and trim away any further whitespace,
+ // bringing $line to the message text only.
+ $line = trim(strstr($line, " "));
+
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // The string must be quoted.
+ $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
+ return FALSE;
+ }
+ if (!isset($this->_current_item['msgstr']) || !is_array($this->_current_item['msgstr'])) {
+ $this->_current_item['msgstr'] = array();
+ }
+
+ $this->_current_item['msgstr'][$this->_current_plural_index] = $quoted;
+
+ $this->_context = 'MSGSTR_ARR';
+ return;
+ }
+ elseif (!strncmp("msgstr", $line, 6)) {
+ // A string pair for an msgidid (with optional context).
+
+ if (($this->_context != 'MSGID') && ($this->_context != 'MSGCTXT')) {
+ // Strings are only valid within an id or context scope.
+ $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Remove 'msgstr' and trim away away whitespaces.
+ $line = trim(substr($line, 6));
+
+ // Only the msgstr string is left, parse it.
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // The string must be quoted.
+ $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ $this->_current_item['msgstr'] = $quoted;
+
+ $this->_context = 'MSGSTR';
+ return;
+ }
+ elseif ($line != '') {
+ // Anything that is not a token may be a continuation of a previous token.
+
+ $quoted = $this->parseQuoted($line);
+ if ($quoted === FALSE) {
+ // This string must be quoted.
+ $this->_errors[] = $t('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
+ return FALSE;
+ }
+
+ // Append the string to the current item.
+ if (($this->_context == 'MSGID') || ($this->_context == 'MSGID_PLURAL')) {
+ if (is_array($this->_current_item['msgid'])) {
+ // Add string to last array element for plural sources.
+ $last_index = count($this->_current_item['msgid']) - 1;
+ $this->_current_item['msgid'][$last_index] .= $quoted;
+ }
+ else {
+ // Singular source, just append the string.
+ $this->_current_item['msgid'] .= $quoted;
+ }
+ }
+ elseif ($this->_context == 'MSGCTXT') {
+ // Multiline context name.
+ $this->_current_item['msgctxt'] .= $quoted;
+ }
+ elseif ($this->_context == 'MSGSTR') {
+ // Multiline translation string.
+ $this->_current_item['msgstr'] .= $quoted;
+ }
+ elseif ($this->_context == 'MSGSTR_ARR') {
+ // Multiline plural translation string.
+ $this->_current_item['msgstr'][$this->_current_plural_index] .= $quoted;
+ }
+ else {
+ // No valid context to append to.
+ $this->_errors[] = $t('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
+ return FALSE;
+ }
+ return;
+ }
+ }
+
+ // Empty line read or EOF of PO stream, close out the last entry.
+ if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
+ $this->setItemFromArray($this->_current_item);
+ $this->_current_item = array();
+ }
+ elseif ($this->_context != 'COMMENT') {
+ $this->_errors[] = $t('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
+ return FALSE;
+ }
+ }
+
+ /**
+ * Store the parsed values as a PoItem object.
+ */
+ public function setItemFromArray($value) {
+ $plural = FALSE;
+
+ $comments = '';
+ if (isset($value['#'])) {
+ $comments = $this->shortenComments($value['#']);
+ }
+
+ if (is_array($value['msgstr'])) {
+ // Sort plural variants by their form index.
+ ksort($value['msgstr']);
+ $plural = TRUE;
+ }
+
+ $item = new PoItem();
+ $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
+ $item->setSource($value['msgid']);
+ $item->setTranslation($value['msgstr']);
+ $item->setPlural($plural);
+ $item->setComment($comments);
+ $item->setLangcode($this->_langcode);
+
+ $this->_last_item = $item;
+
+ $this->_context = 'COMMENT';
+ }
+
+ /**
+ * Parses a string in quotes.
+ *
+ * @param $string
+ * A string specified with enclosing quotes.
+ *
+ * @return
+ * The string parsed from inside the quotes.
+ */
+ function parseQuoted($string) {
+ if (substr($string, 0, 1) != substr($string, -1, 1)) {
+ // Start and end quotes must be the same.
+ return FALSE;
+ }
+ $quote = substr($string, 0, 1);
+ $string = substr($string, 1, -1);
+ if ($quote == '"') {
+ // Double quotes: strip slashes.
+ return stripcslashes($string);
+ }
+ elseif ($quote == "'") {
+ // Simple quote: return as-is.
+ return $string;
+ }
+ else {
+ // Unrecognized quote.
+ return FALSE;
+ }
+ }
+
+ /**
+ * Generates a short, one-string version of the passed comment array.
+ *
+ * @param $comment
+ * An array of strings containing a comment.
+ *
+ * @return
+ * Short one-string version of the comment.
+ */
+ private function shortenComments($comment) {
+ $comm = '';
+ while (count($comment)) {
+ $test = $comm . substr(array_shift($comment), 1) . ', ';
+ if (strlen($comm) < 130) {
+ $comm = $test;
+ }
+ else {
+ break;
+ }
+ }
+ return trim(substr($comm, 0, -2));
+ }
+
+}