diff options
Diffstat (limited to 'core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php')
-rw-r--r-- | core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php | 517 |
1 files changed, 347 insertions, 170 deletions
diff --git a/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php b/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php index bbbec47..3915e32 100644 --- a/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php +++ b/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php @@ -5,8 +5,8 @@ * * LICENSE * - * Copyright (c) 2009-2012 Nicholas J Humfrey. - * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/) + * Copyright (c) 2009-2013 Nicholas J Humfrey. + * Copyright (c) 1997-2013 Aduna (http://www.aduna-software.com/) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,24 +33,36 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package EasyRdf - * @copyright Copyright (c) 2009-2012 Nicholas J Humfrey + * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/) * @license http://www.opensource.org/licenses/bsd-license.php - * @version $Id$ */ /** * Class to parse Turtle with no external dependancies. * - * http://www.w3.org/TR/turtle/ - * + * It is a translation from Java to PHP of the Sesame Turtle Parser: + * http://bit.ly/TurtleParser + * + * Lasted updated against version: + * ecda6a15a200a2fc6a062e2e43081257c3ccd4e6 (Mon Jul 29 12:05:58 2013) + * * @package EasyRdf - * @copyright Copyright (c) 2009-2012 Nicholas J Humfrey - * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/) + * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey + * Copyright (c) 1997-2013 Aduna (http://www.aduna-software.com/) * @license http://www.opensource.org/licenses/bsd-license.php */ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples { + protected $data; + protected $namespaces; + protected $subject; + protected $predicate; + protected $object; + + protected $line; + protected $column; + /** * Constructor * @@ -80,13 +92,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } $this->data = $data; - $this->len = strlen($data); - $this->pos = 0; - $this->namespaces = array(); $this->subject = null; $this->predicate = null; $this->object = null; + + $this->line = 1; + $this->column = 1; $this->resetBnodeMap(); @@ -106,15 +118,29 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function parseStatement() { - $c = $this->peek(); - if ($c == '@') { - $this->parseDirective(); + $directive = ''; + while (true) { + $c = $this->read(); + if ($c == -1 || self::isWhitespace($c)) { + $this->unread($c); + break; + } else { + $directive .= $c; + } + } + + if (preg_match("/^(@|prefix$|base$)/i", $directive)) { + $this->parseDirective($directive); $this->skipWSC(); - $this->verifyCharacter($this->read(), "."); + // SPARQL BASE and PREFIX lines do not end in . + if ($directive[0] == "@") { + $this->verifyCharacterOrFail($this->read(), "."); + } } else { + $this->unread($directive); $this->parseTriples(); $this->skipWSC(); - $this->verifyCharacter($this->read(), "."); + $this->verifyCharacterOrFail($this->read(), "."); } } @@ -122,30 +148,24 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples * Parse a directive [3] * @ignore */ - protected function parseDirective() + protected function parseDirective($directive) { - // Verify that the first characters form the string "prefix" - $this->verifyCharacter($this->read(), "@"); - - $directive = ''; - - $c = $this->read(); - while ($c != -1 && !self::isWhitespace($c)) { - $directive .= $c; - $c = $this->read(); - } - - if ($directive == "prefix") { + $directive = strtolower($directive); + if ($directive == "prefix" || $directive == '@prefix') { $this->parsePrefixID(); - } elseif ($directive == "base") { + } elseif ($directive == "base" || $directive == '@base') { $this->parseBase(); - } elseif (strlen($directive) == 0) { - throw new EasyRdf_Exception( - "Turtle Parse Error: directive name is missing, expected @prefix or @base" + } elseif (mb_strlen($directive) == 0) { + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: directive name is missing, expected @prefix or @base", + $this->line, + $this->column ); } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: unknown directive \"@$directive\"" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unknown directive \"$directive\"", + $this->line, + $this->column ); } } @@ -169,8 +189,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } elseif (self::isWhitespace($c)) { break; } elseif ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading prefix id" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading prefix id", + $this->line, + $this->column ); } @@ -178,7 +200,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } $this->skipWSC(); - $this->verifyCharacter($this->read(), ":"); + $this->verifyCharacterOrFail($this->read(), ":"); $this->skipWSC(); // Read the namespace URI @@ -206,9 +228,37 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function parseTriples() { - $this->parseSubject(); - $this->skipWSC(); - $this->parsePredicateObjectList(); + $c = $this->peek(); + + // If the first character is an open bracket we need to decide which of + // the two parsing methods for blank nodes to use + if ($c == '[') { + $c = $this->read(); + $this->skipWSC(); + $c = $this->peek(); + if ($c == ']') { + $c = $this->read(); + $this->subject = $this->createBNode(); + $this->skipWSC(); + $this->parsePredicateObjectList(); + } else { + $this->unread('['); + $this->subject = $this->parseImplicitBlank(); + } + $this->skipWSC(); + $c = $this->peek(); + + // if this is not the end of the statement, recurse into the list of + // predicate and objects, using the subject parsed above as the subject + // of the statement. + if ($c != '.') { + $this->parsePredicateObjectList(); + } + } else { + $this->parseSubject(); + $this->skipWSC(); + $this->parsePredicateObjectList(); + } $this->subject = null; $this->predicate = null; @@ -233,6 +283,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples if ($c == '.' || $c == ']') { break; + } elseif ($c == ';') { + // empty predicateObjectList, skip to next + continue; } $this->predicate = $this->parsePredicate(); @@ -275,8 +328,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples if ($value['type'] == 'uri' or $value['type'] == 'bnode') { $this->subject = $value; } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: illegal subject type: ".$value['type'] + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: illegal subject type: ".$value['type'], + $this->line, + $this->column ); } } @@ -312,8 +367,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples if ($predicate['type'] == 'uri') { return $predicate; } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: Illegal predicate value: " . $predicate + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: Illegal predicate type: " . $predicate['type'], + $this->line, + $this->column ); } } @@ -351,12 +408,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function parseImplicitBlank() { - $this->verifyCharacter($this->read(), "["); + $this->verifyCharacterOrFail($this->read(), "["); - $bnode = array( - 'type' => 'bnode', - 'value' => $this->graph->newBNodeId() - ); + $bnode = $this->createBNode(); $c = $this->read(); if ($c != ']') { @@ -377,7 +431,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $this->skipWSC(); // Read closing bracket - $this->verifyCharacter($this->read(), "]"); + $this->verifyCharacterOrFail($this->read(), "]"); // Restore previous subject and predicate $this->subject = $oldSubject; @@ -393,7 +447,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function parseCollection() { - $this->verifyCharacter($this->read(), "("); + $this->verifyCharacterOrFail($this->read(), "("); $c = $this->skipWSC(); if ($c == ')') { @@ -404,10 +458,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples 'value' => EasyRdf_Namespace::get('rdf') . 'nil' ); } else { - $listRoot = array( - 'type' => 'bnode', - 'value' => $this->graph->newBNodeId() - ); + $listRoot = $this->createBNode(); // Remember current subject and predicate $oldSubject = $this->subject; @@ -425,10 +476,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples while ($this->skipWSC() != ')') { // Create another list node and link it to the previous - $newNode = array( - 'type' => 'bnode', - 'value' => $this->graph->newBNodeId() - ); + $newNode = $this->createBNode(); $this->addTriple( $bNode['value'], @@ -481,31 +529,34 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } elseif ($c == '_') { // node ID, e.g. _:n1 return $this->parseNodeID(); - } elseif ($c == '"' or $c == "'") { + } elseif ($c == '"' || $c == "'") { // quoted literal, e.g. "foo" or """foo""" or 'foo' or '''foo''' - return $this->parseQuotedLiteral($c); + return $this->parseQuotedLiteral(); } elseif (ctype_digit($c) || $c == '.' || $c == '+' || $c == '-') { // integer or double, e.g. 123 or 1.2e3 return $this->parseNumber(); } elseif ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading value" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading value", + $this->line, + $this->column ); } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: expected an RDF value here, found '$c'" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: expected an RDF value here, found '$c'", + $this->line, + $this->column ); } } /** * Parses a quoted string, optionally followed by a language tag or datatype. - * @param string $quote The type of quote to use (either ' or ") * @ignore */ - protected function parseQuotedLiteral($quote) + protected function parseQuotedLiteral() { - $label = $this->parseQuotedString($quote); + $label = $this->parseQuotedString(); // Check for presence of a language tag or datatype $c = $this->peek(); @@ -517,20 +568,35 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $lang = ''; $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading language" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading language", + $this->line, + $this->column ); } elseif (!self::isLanguageStartChar($c)) { - throw new EasyRdf_Exception( - "Turtle Parse Error: expected a letter, found '$c'" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: expected a letter, found '$c'", + $this->line, + $this->column ); } $lang .= $c; $c = $this->read(); - while (self::isLanguageChar($c)) { - $lang .= $c; + while (!self::isWhitespace($c)) { + if ($c == '.' || $c == ';' || $c == ',' || $c == ')' || $c == ']' || $c == -1) { + break; + } + if (self::isLanguageChar($c)) { + $lang .= $c; + } else { + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: illegal language tag char: '$c'", + $this->line, + $this->column + ); + } $c = $this->read(); } @@ -545,7 +611,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $this->read(); // next character should be another '^' - $this->verifyCharacter($this->read(), "^"); + $this->verifyCharacterOrFail($this->read(), "^"); // Read datatype $datatype = $this->parseValue(); @@ -556,8 +622,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples 'datatype' => $datatype['value'] ); } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: illegal datatype value: $datatype" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: illegal datatype type: " . $datatype['type'], + $this->line, + $this->column ); } } else { @@ -570,29 +638,30 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples /** * Parses a quoted string, which is either a "normal string" or a """long string""". - * @param string $quote The type of quote to use (either ' or ") * @ignore */ - protected function parseQuotedString($quote) + protected function parseQuotedString() { $result = null; + $c1 = $this->read(); + // First character should be ' or " - $this->verifyCharacter($this->read(), $quote); + $this->verifyCharacterOrFail($c1, "\"\'"); // Check for long-string, which starts and ends with three double quotes $c2 = $this->read(); $c3 = $this->read(); - if ($c2 == $quote && $c3 == $quote) { + if ($c2 == $c1 && $c3 == $c1) { // Long string - $result = $this->parseLongString($quote); + $result = $this->parseLongString($c2); } else { // Normal string $this->unread($c3); $this->unread($c2); - $result = $this->parseString($quote); + $result = $this->parseString($c1); } // Unescape any escape sequences @@ -600,23 +669,25 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } /** - * Parses a "normal string". This method assumes that the first double quote + * Parses a "normal string". This method requires that the opening character * has already been parsed. - * @param string $quote The type of quote to use (either ' or ") + * @param string $closingCharacter The type of quote to use (either ' or ") * @ignore */ - protected function parseString($quote) + protected function parseString($closingCharacter) { $str = ''; while (true) { $c = $this->read(); - if ($c == $quote) { + if ($c == $closingCharacter) { break; } elseif ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading string" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading string", + $this->line, + $this->column ); } @@ -626,8 +697,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples // This escapes the next character, which might be a ' or a " $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading string" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading string", + $this->line, + $this->column ); } $str .= $c; @@ -638,12 +711,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } /** - * Parses a """long string""". This method assumes that the first three - * double quotes have already been parsed. - * @param string $quote The type of quote to use (either ' or ") + * Parses a """long string""". This method requires that the first three + * characters have already been parsed. + * @param string $closingCharacter The type of quote to use (either ' or ") * @ignore */ - protected function parseLongString($quote) + protected function parseLongString($closingCharacter) { $str = ''; $doubleQuoteCount = 0; @@ -652,10 +725,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading long string" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading long string", + $this->line, + $this->column ); - } elseif ($c == $quote) { + } elseif ($c == $closingCharacter) { $doubleQuoteCount++; } else { $doubleQuoteCount = 0; @@ -667,15 +742,17 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples // This escapes the next character, which might be a ' or " $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading long string" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading long string", + $this->line, + $this->column ); } $str .= $c; } } - return substr($str, 0, -3); + return mb_substr($str, 0, -3); } /** @@ -701,29 +778,39 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } if ($c == '.' || $c == 'e' || $c == 'E') { - // We're parsing a decimal or a double - $datatype = EasyRdf_Namespace::get('xsd').'decimal'; - // read optional fractional digits if ($c == '.') { - $value .= $c; - $c = $this->read(); - while (ctype_digit($c)) { + + if (self::isWhitespace($this->peek())) { + // We're parsing an integer that did not have a space before the + // period to end the statement + } else { $value .= $c; $c = $this->read(); - } - - if (strlen($value) == 1) { - // We've only parsed a '.' - throw new EasyRdf_Exception( - "Turtle Parse Error: object for statement missing" - ); + while (ctype_digit($c)) { + $value .= $c; + $c = $this->read(); + } + + if (mb_strlen($value) == 1) { + // We've only parsed a '.' + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: object for statement missing", + $this->line, + $this->column + ); + } + + // We're parsing a decimal or a double + $datatype = EasyRdf_Namespace::get('xsd').'decimal'; } } else { - if (strlen($value) == 0) { + if (mb_strlen($value) == 0) { // We've only parsed an 'e' or 'E' - throw new EasyRdf_Exception( - "Turtle Parse Error: object for statement missing" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: object for statement missing", + $this->line, + $this->column ); } } @@ -740,8 +827,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } if (!ctype_digit($c)) { - throw new EasyRdf_Exception( - "Turtle Parse Error: Exponent value missing" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: exponent value missing", + $this->line, + $this->column ); } @@ -775,7 +864,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $uri = ''; // First character should be '<' - $this->verifyCharacter($this->read(), "<"); + $this->verifyCharacterOrFail($this->read(), "<"); // Read up to the next '>' character while (true) { @@ -784,8 +873,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples if ($c == '>') { break; } elseif ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading URI" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading URI", + $this->line, + $this->column ); } @@ -795,8 +886,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples // This escapes the next character, which might be a '>' $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading URI" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading URI", + $this->line, + $this->column ); } $uri .= $c; @@ -822,13 +915,17 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples // First character should be a ':' or a letter $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while readying value" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while readying value", + $this->line, + $this->column ); } if ($c != ':' && !self::isPrefixStartChar($c)) { - throw new EasyRdf_Exception( - "Turtle Parse Error: expected a ':' or a letter, found '$c'" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: expected a ':' or a letter, found '$c'", + $this->line, + $this->column ); } @@ -836,10 +933,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples if ($c == ':') { // qname using default namespace - $namespace = $this->namespaces[""]; - if ($namespace == null) { - throw new EasyRdf_Exception( - "Turtle Parse Error: default namespace used but not defined" + if (isset($this->namespaces[''])) { + $namespace = $this->namespaces['']; + } else { + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: default namespace used but not defined", + $this->line, + $this->column ); } } else { @@ -865,13 +965,15 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } } - $this->verifyCharacter($c, ":"); + $this->verifyCharacterOrFail($c, ":"); if (isset($this->namespaces[$prefix])) { $namespace = $this->namespaces[$prefix]; } else { - throw new EasyRdf_Exception( - "Turtle Parse Error: namespace prefix '$prefix' used but not defined" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: namespace prefix '$prefix' used but not defined", + $this->line, + $this->column ); } } @@ -880,11 +982,19 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $localName = ''; $c = $this->read(); if (self::isNameStartChar($c)) { - $localName .= $c; + if ($c == '\\') { + $localName .= $this->readLocalEscapedChar(); + } else { + $localName .= $c; + } $c = $this->read(); while (self::isNameChar($c)) { - $localName .= $c; + if ($c == '\\') { + $localName .= $this->readLocalEscapedChar(); + } else { + $localName .= $c; + } $c = $this->read(); } } @@ -899,6 +1009,21 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples ); } + protected function readLocalEscapedChar() + { + $c = $this->read(); + + if (self::isLocalEscapedChar($c)) { + return $c; + } else { + throw new EasyRdf_Parser_Exception( + "found '" . $c . "', expected one of: " . implode(', ', self::$localEscapedChars), + $this->line, + $this->column + ); + } + } + /** * Parses a blank node ID, e.g: _:node1 * @ignore @@ -906,18 +1031,22 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples protected function parseNodeID() { // Node ID should start with "_:" - $this->verifyCharacter($this->read(), "_"); - $this->verifyCharacter($this->read(), ":"); + $this->verifyCharacterOrFail($this->read(), "_"); + $this->verifyCharacterOrFail($this->read(), ":"); // Read the node ID $c = $this->read(); if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file while reading node id" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file while reading node id", + $this->line, + $this->column ); } elseif (!self::isNameStartChar($c)) { - throw new EasyRdf_Exception( - "Turtle Parse Error: expected a letter, found '$c'" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: expected a letter, found '$c'", + $this->line, + $this->column ); } @@ -952,11 +1081,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples * exception if this is not the case. * @ignore */ - protected function verifyCharacter($c, $expected) + protected function verifyCharacterOrFail($c, $expected) { if ($c == -1) { - throw new EasyRdf_Exception( - "Turtle Parse Error: unexpected end of file" + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: unexpected end of file", + $this->line, + $this->column ); } elseif (strpbrk($c, $expected) === false) { $msg = 'expected '; @@ -968,7 +1099,11 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples } $msg .= ", found '$c'"; - throw new EasyRdf_Exception("Turtle Parse Error: $msg"); + throw new EasyRdf_Parser_Exception( + "Turtle Parse Error: $msg", + $this->line, + $this->column + ); } } @@ -981,7 +1116,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $c = $this->read(); while (self::isWhitespace($c) || $c == '#') { if ($c == '#') { - $this->skipLine(); + $this->processComment(); } $c = $this->read(); @@ -995,10 +1130,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples * Consumes characters from reader until the first EOL has been read. * @ignore */ - protected function skipLine() + protected function processComment() { + $comment = ''; $c = $this->read(); while ($c != -1 && $c != "\r" && $c != "\n") { + $comment .= $c; $c = $this->read(); } @@ -1019,9 +1156,16 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function read() { - if ($this->pos < $this->len) { - $c = $this->data[$this->pos]; - $this->pos++; + if (!empty($this->data)) { + $c = mb_substr($this->data, 0, 1); + // Keep tracks of which line we are on (0A = Line Feed) + if ($c == "\x0A") { + $this->line += 1; + $this->column = 1; + } else { + $this->column += 1; + } + $this->data = mb_substr($this->data, 1); return $c; } else { return -1; @@ -1035,8 +1179,8 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples */ protected function peek() { - if ($this->pos < $this->len) { - return $this->data[$this->pos]; + if (!empty($this->data)) { + return mb_substr($this->data, 0, 1); } else { return -1; } @@ -1047,13 +1191,20 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples * Steps back, restoring the previous character read() to the input buffer * @ignore */ - protected function unread() + protected function unread($c) { - if ($this->pos > 0) { - $this->pos--; - } else { - throw new EasyRdf_Exception("Turtle Parse Error: unread error"); - } + # FIXME: deal with unreading new lines + $this->column -= mb_strlen($c); + $this->data = $c . $this->data; + } + + /** @ignore */ + protected function createBNode() + { + return array( + 'type' => 'bnode', + 'value' => $this->graph->newBNodeId() + ); } /** @@ -1063,7 +1214,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples public static function isWhitespace($c) { // Whitespace character are space, tab, newline and carriage return: - return $c == " " || $c == "\t" || $c == "\r" || $c == "\n"; + return $c == "\x20" || $c == "\x09" || $c == "\x0A" || $c == "\x0D"; } /** @ignore */ @@ -1090,7 +1241,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples /** @ignore */ public static function isNameStartChar($c) { - return $c == '_' || self::isPrefixStartChar($c); + return + $c == '\\' || + $c == '_' || + $c == ':' || + $c == '%' || + ctype_digit($c) || + self::isPrefixStartChar($c); } /** @ignore */ @@ -1099,17 +1256,37 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples $o = ord($c); return self::isNameStartChar($c) || + $o >= 0x30 && $o <= 0x39 || # 0-9 $c == '-' || - $o >= 0x30 && $o <= 0x39 || # numeric $o == 0x00B7 || $o >= 0x0300 && $o <= 0x036F || $o >= 0x203F && $o <= 0x2040; } /** @ignore */ + private static $localEscapedChars = array( + '_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', + '*', '+', ',', ';', '=', '/', '?', '#', '@', '%' + ); + + /** @ignore */ + public static function isLocalEscapedChar($c) + { + return in_array($c, self::$localEscapedChars); + } + + /** @ignore */ public static function isPrefixChar($c) { - return self::isNameChar($c); + $o = ord($c); + return + $c == '_' || + $o >= 0x30 && $o <= 0x39 || # 0-9 + self::isPrefixStartChar($c) || + $c == '-' || + $o == 0x00B7 || + $c >= 0x0300 && $c <= 0x036F || + $c >= 0x203F && $c <= 0x2040; } /** @ignore */ @@ -1117,8 +1294,8 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples { $o = ord($c); return - $o >= 0x41 && $o <= 0x5a || - $o >= 0x61 && $o <= 0x7a; + $o >= 0x41 && $o <= 0x5a || # A-Z + $o >= 0x61 && $o <= 0x7a; # a-z } /** @ignore */ @@ -1126,9 +1303,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples { $o = ord($c); return - $o >= 0x41 && $o <= 0x5a || # A-Z - $o >= 0x61 && $o <= 0x7a || # a-z - $o >= 0x30 && $o <= 0x39 || # 0-9 + $o >= 0x41 && $o <= 0x5a || # A-Z + $o >= 0x61 && $o <= 0x7a || # a-z + $o >= 0x30 && $o <= 0x39 || # 0-9 $c == '-'; } } |