summaryrefslogtreecommitdiffstats
path: root/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php
diff options
context:
space:
mode:
Diffstat (limited to 'core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php')
-rw-r--r--core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php517
1 files changed, 347 insertions, 170 deletions
diff --git a/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php b/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php
index bbbec47..3915e32 100644
--- a/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php
+++ b/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Turtle.php
@@ -5,8 +5,8 @@
*
* LICENSE
*
- * Copyright (c) 2009-2012 Nicholas J Humfrey.
- * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/)
+ * Copyright (c) 2009-2013 Nicholas J Humfrey.
+ * Copyright (c) 1997-2013 Aduna (http://www.aduna-software.com/)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,24 +33,36 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package EasyRdf
- * @copyright Copyright (c) 2009-2012 Nicholas J Humfrey
+ * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
* Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/)
* @license http://www.opensource.org/licenses/bsd-license.php
- * @version $Id$
*/
/**
* Class to parse Turtle with no external dependancies.
*
- * http://www.w3.org/TR/turtle/
- *
+ * It is a translation from Java to PHP of the Sesame Turtle Parser:
+ * http://bit.ly/TurtleParser
+ *
+ * Lasted updated against version:
+ * ecda6a15a200a2fc6a062e2e43081257c3ccd4e6 (Mon Jul 29 12:05:58 2013)
+ *
* @package EasyRdf
- * @copyright Copyright (c) 2009-2012 Nicholas J Humfrey
- * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/)
+ * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
+ * Copyright (c) 1997-2013 Aduna (http://www.aduna-software.com/)
* @license http://www.opensource.org/licenses/bsd-license.php
*/
class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
{
+ protected $data;
+ protected $namespaces;
+ protected $subject;
+ protected $predicate;
+ protected $object;
+
+ protected $line;
+ protected $column;
+
/**
* Constructor
*
@@ -80,13 +92,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
$this->data = $data;
- $this->len = strlen($data);
- $this->pos = 0;
-
$this->namespaces = array();
$this->subject = null;
$this->predicate = null;
$this->object = null;
+
+ $this->line = 1;
+ $this->column = 1;
$this->resetBnodeMap();
@@ -106,15 +118,29 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function parseStatement()
{
- $c = $this->peek();
- if ($c == '@') {
- $this->parseDirective();
+ $directive = '';
+ while (true) {
+ $c = $this->read();
+ if ($c == -1 || self::isWhitespace($c)) {
+ $this->unread($c);
+ break;
+ } else {
+ $directive .= $c;
+ }
+ }
+
+ if (preg_match("/^(@|prefix$|base$)/i", $directive)) {
+ $this->parseDirective($directive);
$this->skipWSC();
- $this->verifyCharacter($this->read(), ".");
+ // SPARQL BASE and PREFIX lines do not end in .
+ if ($directive[0] == "@") {
+ $this->verifyCharacterOrFail($this->read(), ".");
+ }
} else {
+ $this->unread($directive);
$this->parseTriples();
$this->skipWSC();
- $this->verifyCharacter($this->read(), ".");
+ $this->verifyCharacterOrFail($this->read(), ".");
}
}
@@ -122,30 +148,24 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
* Parse a directive [3]
* @ignore
*/
- protected function parseDirective()
+ protected function parseDirective($directive)
{
- // Verify that the first characters form the string "prefix"
- $this->verifyCharacter($this->read(), "@");
-
- $directive = '';
-
- $c = $this->read();
- while ($c != -1 && !self::isWhitespace($c)) {
- $directive .= $c;
- $c = $this->read();
- }
-
- if ($directive == "prefix") {
+ $directive = strtolower($directive);
+ if ($directive == "prefix" || $directive == '@prefix') {
$this->parsePrefixID();
- } elseif ($directive == "base") {
+ } elseif ($directive == "base" || $directive == '@base') {
$this->parseBase();
- } elseif (strlen($directive) == 0) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: directive name is missing, expected @prefix or @base"
+ } elseif (mb_strlen($directive) == 0) {
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: directive name is missing, expected @prefix or @base",
+ $this->line,
+ $this->column
);
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unknown directive \"@$directive\""
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unknown directive \"$directive\"",
+ $this->line,
+ $this->column
);
}
}
@@ -169,8 +189,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
} elseif (self::isWhitespace($c)) {
break;
} elseif ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading prefix id"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading prefix id",
+ $this->line,
+ $this->column
);
}
@@ -178,7 +200,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
$this->skipWSC();
- $this->verifyCharacter($this->read(), ":");
+ $this->verifyCharacterOrFail($this->read(), ":");
$this->skipWSC();
// Read the namespace URI
@@ -206,9 +228,37 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function parseTriples()
{
- $this->parseSubject();
- $this->skipWSC();
- $this->parsePredicateObjectList();
+ $c = $this->peek();
+
+ // If the first character is an open bracket we need to decide which of
+ // the two parsing methods for blank nodes to use
+ if ($c == '[') {
+ $c = $this->read();
+ $this->skipWSC();
+ $c = $this->peek();
+ if ($c == ']') {
+ $c = $this->read();
+ $this->subject = $this->createBNode();
+ $this->skipWSC();
+ $this->parsePredicateObjectList();
+ } else {
+ $this->unread('[');
+ $this->subject = $this->parseImplicitBlank();
+ }
+ $this->skipWSC();
+ $c = $this->peek();
+
+ // if this is not the end of the statement, recurse into the list of
+ // predicate and objects, using the subject parsed above as the subject
+ // of the statement.
+ if ($c != '.') {
+ $this->parsePredicateObjectList();
+ }
+ } else {
+ $this->parseSubject();
+ $this->skipWSC();
+ $this->parsePredicateObjectList();
+ }
$this->subject = null;
$this->predicate = null;
@@ -233,6 +283,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
if ($c == '.' || $c == ']') {
break;
+ } elseif ($c == ';') {
+ // empty predicateObjectList, skip to next
+ continue;
}
$this->predicate = $this->parsePredicate();
@@ -275,8 +328,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
if ($value['type'] == 'uri' or $value['type'] == 'bnode') {
$this->subject = $value;
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: illegal subject type: ".$value['type']
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: illegal subject type: ".$value['type'],
+ $this->line,
+ $this->column
);
}
}
@@ -312,8 +367,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
if ($predicate['type'] == 'uri') {
return $predicate;
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: Illegal predicate value: " . $predicate
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: Illegal predicate type: " . $predicate['type'],
+ $this->line,
+ $this->column
);
}
}
@@ -351,12 +408,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function parseImplicitBlank()
{
- $this->verifyCharacter($this->read(), "[");
+ $this->verifyCharacterOrFail($this->read(), "[");
- $bnode = array(
- 'type' => 'bnode',
- 'value' => $this->graph->newBNodeId()
- );
+ $bnode = $this->createBNode();
$c = $this->read();
if ($c != ']') {
@@ -377,7 +431,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$this->skipWSC();
// Read closing bracket
- $this->verifyCharacter($this->read(), "]");
+ $this->verifyCharacterOrFail($this->read(), "]");
// Restore previous subject and predicate
$this->subject = $oldSubject;
@@ -393,7 +447,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function parseCollection()
{
- $this->verifyCharacter($this->read(), "(");
+ $this->verifyCharacterOrFail($this->read(), "(");
$c = $this->skipWSC();
if ($c == ')') {
@@ -404,10 +458,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
'value' => EasyRdf_Namespace::get('rdf') . 'nil'
);
} else {
- $listRoot = array(
- 'type' => 'bnode',
- 'value' => $this->graph->newBNodeId()
- );
+ $listRoot = $this->createBNode();
// Remember current subject and predicate
$oldSubject = $this->subject;
@@ -425,10 +476,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
while ($this->skipWSC() != ')') {
// Create another list node and link it to the previous
- $newNode = array(
- 'type' => 'bnode',
- 'value' => $this->graph->newBNodeId()
- );
+ $newNode = $this->createBNode();
$this->addTriple(
$bNode['value'],
@@ -481,31 +529,34 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
} elseif ($c == '_') {
// node ID, e.g. _:n1
return $this->parseNodeID();
- } elseif ($c == '"' or $c == "'") {
+ } elseif ($c == '"' || $c == "'") {
// quoted literal, e.g. "foo" or """foo""" or 'foo' or '''foo'''
- return $this->parseQuotedLiteral($c);
+ return $this->parseQuotedLiteral();
} elseif (ctype_digit($c) || $c == '.' || $c == '+' || $c == '-') {
// integer or double, e.g. 123 or 1.2e3
return $this->parseNumber();
} elseif ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading value"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading value",
+ $this->line,
+ $this->column
);
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: expected an RDF value here, found '$c'"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: expected an RDF value here, found '$c'",
+ $this->line,
+ $this->column
);
}
}
/**
* Parses a quoted string, optionally followed by a language tag or datatype.
- * @param string $quote The type of quote to use (either ' or ")
* @ignore
*/
- protected function parseQuotedLiteral($quote)
+ protected function parseQuotedLiteral()
{
- $label = $this->parseQuotedString($quote);
+ $label = $this->parseQuotedString();
// Check for presence of a language tag or datatype
$c = $this->peek();
@@ -517,20 +568,35 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$lang = '';
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading language"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading language",
+ $this->line,
+ $this->column
);
} elseif (!self::isLanguageStartChar($c)) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: expected a letter, found '$c'"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: expected a letter, found '$c'",
+ $this->line,
+ $this->column
);
}
$lang .= $c;
$c = $this->read();
- while (self::isLanguageChar($c)) {
- $lang .= $c;
+ while (!self::isWhitespace($c)) {
+ if ($c == '.' || $c == ';' || $c == ',' || $c == ')' || $c == ']' || $c == -1) {
+ break;
+ }
+ if (self::isLanguageChar($c)) {
+ $lang .= $c;
+ } else {
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: illegal language tag char: '$c'",
+ $this->line,
+ $this->column
+ );
+ }
$c = $this->read();
}
@@ -545,7 +611,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$this->read();
// next character should be another '^'
- $this->verifyCharacter($this->read(), "^");
+ $this->verifyCharacterOrFail($this->read(), "^");
// Read datatype
$datatype = $this->parseValue();
@@ -556,8 +622,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
'datatype' => $datatype['value']
);
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: illegal datatype value: $datatype"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: illegal datatype type: " . $datatype['type'],
+ $this->line,
+ $this->column
);
}
} else {
@@ -570,29 +638,30 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
/**
* Parses a quoted string, which is either a "normal string" or a """long string""".
- * @param string $quote The type of quote to use (either ' or ")
* @ignore
*/
- protected function parseQuotedString($quote)
+ protected function parseQuotedString()
{
$result = null;
+ $c1 = $this->read();
+
// First character should be ' or "
- $this->verifyCharacter($this->read(), $quote);
+ $this->verifyCharacterOrFail($c1, "\"\'");
// Check for long-string, which starts and ends with three double quotes
$c2 = $this->read();
$c3 = $this->read();
- if ($c2 == $quote && $c3 == $quote) {
+ if ($c2 == $c1 && $c3 == $c1) {
// Long string
- $result = $this->parseLongString($quote);
+ $result = $this->parseLongString($c2);
} else {
// Normal string
$this->unread($c3);
$this->unread($c2);
- $result = $this->parseString($quote);
+ $result = $this->parseString($c1);
}
// Unescape any escape sequences
@@ -600,23 +669,25 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
/**
- * Parses a "normal string". This method assumes that the first double quote
+ * Parses a "normal string". This method requires that the opening character
* has already been parsed.
- * @param string $quote The type of quote to use (either ' or ")
+ * @param string $closingCharacter The type of quote to use (either ' or ")
* @ignore
*/
- protected function parseString($quote)
+ protected function parseString($closingCharacter)
{
$str = '';
while (true) {
$c = $this->read();
- if ($c == $quote) {
+ if ($c == $closingCharacter) {
break;
} elseif ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading string"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading string",
+ $this->line,
+ $this->column
);
}
@@ -626,8 +697,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
// This escapes the next character, which might be a ' or a "
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading string"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading string",
+ $this->line,
+ $this->column
);
}
$str .= $c;
@@ -638,12 +711,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
/**
- * Parses a """long string""". This method assumes that the first three
- * double quotes have already been parsed.
- * @param string $quote The type of quote to use (either ' or ")
+ * Parses a """long string""". This method requires that the first three
+ * characters have already been parsed.
+ * @param string $closingCharacter The type of quote to use (either ' or ")
* @ignore
*/
- protected function parseLongString($quote)
+ protected function parseLongString($closingCharacter)
{
$str = '';
$doubleQuoteCount = 0;
@@ -652,10 +725,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading long string"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading long string",
+ $this->line,
+ $this->column
);
- } elseif ($c == $quote) {
+ } elseif ($c == $closingCharacter) {
$doubleQuoteCount++;
} else {
$doubleQuoteCount = 0;
@@ -667,15 +742,17 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
// This escapes the next character, which might be a ' or "
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading long string"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading long string",
+ $this->line,
+ $this->column
);
}
$str .= $c;
}
}
- return substr($str, 0, -3);
+ return mb_substr($str, 0, -3);
}
/**
@@ -701,29 +778,39 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
if ($c == '.' || $c == 'e' || $c == 'E') {
- // We're parsing a decimal or a double
- $datatype = EasyRdf_Namespace::get('xsd').'decimal';
-
// read optional fractional digits
if ($c == '.') {
- $value .= $c;
- $c = $this->read();
- while (ctype_digit($c)) {
+
+ if (self::isWhitespace($this->peek())) {
+ // We're parsing an integer that did not have a space before the
+ // period to end the statement
+ } else {
$value .= $c;
$c = $this->read();
- }
-
- if (strlen($value) == 1) {
- // We've only parsed a '.'
- throw new EasyRdf_Exception(
- "Turtle Parse Error: object for statement missing"
- );
+ while (ctype_digit($c)) {
+ $value .= $c;
+ $c = $this->read();
+ }
+
+ if (mb_strlen($value) == 1) {
+ // We've only parsed a '.'
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: object for statement missing",
+ $this->line,
+ $this->column
+ );
+ }
+
+ // We're parsing a decimal or a double
+ $datatype = EasyRdf_Namespace::get('xsd').'decimal';
}
} else {
- if (strlen($value) == 0) {
+ if (mb_strlen($value) == 0) {
// We've only parsed an 'e' or 'E'
- throw new EasyRdf_Exception(
- "Turtle Parse Error: object for statement missing"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: object for statement missing",
+ $this->line,
+ $this->column
);
}
}
@@ -740,8 +827,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
if (!ctype_digit($c)) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: Exponent value missing"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: exponent value missing",
+ $this->line,
+ $this->column
);
}
@@ -775,7 +864,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$uri = '';
// First character should be '<'
- $this->verifyCharacter($this->read(), "<");
+ $this->verifyCharacterOrFail($this->read(), "<");
// Read up to the next '>' character
while (true) {
@@ -784,8 +873,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
if ($c == '>') {
break;
} elseif ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading URI"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading URI",
+ $this->line,
+ $this->column
);
}
@@ -795,8 +886,10 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
// This escapes the next character, which might be a '>'
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading URI"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading URI",
+ $this->line,
+ $this->column
);
}
$uri .= $c;
@@ -822,13 +915,17 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
// First character should be a ':' or a letter
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while readying value"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while readying value",
+ $this->line,
+ $this->column
);
}
if ($c != ':' && !self::isPrefixStartChar($c)) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: expected a ':' or a letter, found '$c'"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: expected a ':' or a letter, found '$c'",
+ $this->line,
+ $this->column
);
}
@@ -836,10 +933,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
if ($c == ':') {
// qname using default namespace
- $namespace = $this->namespaces[""];
- if ($namespace == null) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: default namespace used but not defined"
+ if (isset($this->namespaces[''])) {
+ $namespace = $this->namespaces[''];
+ } else {
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: default namespace used but not defined",
+ $this->line,
+ $this->column
);
}
} else {
@@ -865,13 +965,15 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
}
- $this->verifyCharacter($c, ":");
+ $this->verifyCharacterOrFail($c, ":");
if (isset($this->namespaces[$prefix])) {
$namespace = $this->namespaces[$prefix];
} else {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: namespace prefix '$prefix' used but not defined"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: namespace prefix '$prefix' used but not defined",
+ $this->line,
+ $this->column
);
}
}
@@ -880,11 +982,19 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$localName = '';
$c = $this->read();
if (self::isNameStartChar($c)) {
- $localName .= $c;
+ if ($c == '\\') {
+ $localName .= $this->readLocalEscapedChar();
+ } else {
+ $localName .= $c;
+ }
$c = $this->read();
while (self::isNameChar($c)) {
- $localName .= $c;
+ if ($c == '\\') {
+ $localName .= $this->readLocalEscapedChar();
+ } else {
+ $localName .= $c;
+ }
$c = $this->read();
}
}
@@ -899,6 +1009,21 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
);
}
+ protected function readLocalEscapedChar()
+ {
+ $c = $this->read();
+
+ if (self::isLocalEscapedChar($c)) {
+ return $c;
+ } else {
+ throw new EasyRdf_Parser_Exception(
+ "found '" . $c . "', expected one of: " . implode(', ', self::$localEscapedChars),
+ $this->line,
+ $this->column
+ );
+ }
+ }
+
/**
* Parses a blank node ID, e.g: _:node1
* @ignore
@@ -906,18 +1031,22 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
protected function parseNodeID()
{
// Node ID should start with "_:"
- $this->verifyCharacter($this->read(), "_");
- $this->verifyCharacter($this->read(), ":");
+ $this->verifyCharacterOrFail($this->read(), "_");
+ $this->verifyCharacterOrFail($this->read(), ":");
// Read the node ID
$c = $this->read();
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file while reading node id"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file while reading node id",
+ $this->line,
+ $this->column
);
} elseif (!self::isNameStartChar($c)) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: expected a letter, found '$c'"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: expected a letter, found '$c'",
+ $this->line,
+ $this->column
);
}
@@ -952,11 +1081,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
* exception if this is not the case.
* @ignore
*/
- protected function verifyCharacter($c, $expected)
+ protected function verifyCharacterOrFail($c, $expected)
{
if ($c == -1) {
- throw new EasyRdf_Exception(
- "Turtle Parse Error: unexpected end of file"
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: unexpected end of file",
+ $this->line,
+ $this->column
);
} elseif (strpbrk($c, $expected) === false) {
$msg = 'expected ';
@@ -968,7 +1099,11 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
}
$msg .= ", found '$c'";
- throw new EasyRdf_Exception("Turtle Parse Error: $msg");
+ throw new EasyRdf_Parser_Exception(
+ "Turtle Parse Error: $msg",
+ $this->line,
+ $this->column
+ );
}
}
@@ -981,7 +1116,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$c = $this->read();
while (self::isWhitespace($c) || $c == '#') {
if ($c == '#') {
- $this->skipLine();
+ $this->processComment();
}
$c = $this->read();
@@ -995,10 +1130,12 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
* Consumes characters from reader until the first EOL has been read.
* @ignore
*/
- protected function skipLine()
+ protected function processComment()
{
+ $comment = '';
$c = $this->read();
while ($c != -1 && $c != "\r" && $c != "\n") {
+ $comment .= $c;
$c = $this->read();
}
@@ -1019,9 +1156,16 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function read()
{
- if ($this->pos < $this->len) {
- $c = $this->data[$this->pos];
- $this->pos++;
+ if (!empty($this->data)) {
+ $c = mb_substr($this->data, 0, 1);
+ // Keep tracks of which line we are on (0A = Line Feed)
+ if ($c == "\x0A") {
+ $this->line += 1;
+ $this->column = 1;
+ } else {
+ $this->column += 1;
+ }
+ $this->data = mb_substr($this->data, 1);
return $c;
} else {
return -1;
@@ -1035,8 +1179,8 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
*/
protected function peek()
{
- if ($this->pos < $this->len) {
- return $this->data[$this->pos];
+ if (!empty($this->data)) {
+ return mb_substr($this->data, 0, 1);
} else {
return -1;
}
@@ -1047,13 +1191,20 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
* Steps back, restoring the previous character read() to the input buffer
* @ignore
*/
- protected function unread()
+ protected function unread($c)
{
- if ($this->pos > 0) {
- $this->pos--;
- } else {
- throw new EasyRdf_Exception("Turtle Parse Error: unread error");
- }
+ # FIXME: deal with unreading new lines
+ $this->column -= mb_strlen($c);
+ $this->data = $c . $this->data;
+ }
+
+ /** @ignore */
+ protected function createBNode()
+ {
+ return array(
+ 'type' => 'bnode',
+ 'value' => $this->graph->newBNodeId()
+ );
}
/**
@@ -1063,7 +1214,7 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
public static function isWhitespace($c)
{
// Whitespace character are space, tab, newline and carriage return:
- return $c == " " || $c == "\t" || $c == "\r" || $c == "\n";
+ return $c == "\x20" || $c == "\x09" || $c == "\x0A" || $c == "\x0D";
}
/** @ignore */
@@ -1090,7 +1241,13 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
/** @ignore */
public static function isNameStartChar($c)
{
- return $c == '_' || self::isPrefixStartChar($c);
+ return
+ $c == '\\' ||
+ $c == '_' ||
+ $c == ':' ||
+ $c == '%' ||
+ ctype_digit($c) ||
+ self::isPrefixStartChar($c);
}
/** @ignore */
@@ -1099,17 +1256,37 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
$o = ord($c);
return
self::isNameStartChar($c) ||
+ $o >= 0x30 && $o <= 0x39 || # 0-9
$c == '-' ||
- $o >= 0x30 && $o <= 0x39 || # numeric
$o == 0x00B7 ||
$o >= 0x0300 && $o <= 0x036F ||
$o >= 0x203F && $o <= 0x2040;
}
/** @ignore */
+ private static $localEscapedChars = array(
+ '_', '~', '.', '-', '!', '$', '&', '\'', '(', ')',
+ '*', '+', ',', ';', '=', '/', '?', '#', '@', '%'
+ );
+
+ /** @ignore */
+ public static function isLocalEscapedChar($c)
+ {
+ return in_array($c, self::$localEscapedChars);
+ }
+
+ /** @ignore */
public static function isPrefixChar($c)
{
- return self::isNameChar($c);
+ $o = ord($c);
+ return
+ $c == '_' ||
+ $o >= 0x30 && $o <= 0x39 || # 0-9
+ self::isPrefixStartChar($c) ||
+ $c == '-' ||
+ $o == 0x00B7 ||
+ $c >= 0x0300 && $c <= 0x036F ||
+ $c >= 0x203F && $c <= 0x2040;
}
/** @ignore */
@@ -1117,8 +1294,8 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
{
$o = ord($c);
return
- $o >= 0x41 && $o <= 0x5a ||
- $o >= 0x61 && $o <= 0x7a;
+ $o >= 0x41 && $o <= 0x5a || # A-Z
+ $o >= 0x61 && $o <= 0x7a; # a-z
}
/** @ignore */
@@ -1126,9 +1303,9 @@ class EasyRdf_Parser_Turtle extends EasyRdf_Parser_Ntriples
{
$o = ord($c);
return
- $o >= 0x41 && $o <= 0x5a || # A-Z
- $o >= 0x61 && $o <= 0x7a || # a-z
- $o >= 0x30 && $o <= 0x39 || # 0-9
+ $o >= 0x41 && $o <= 0x5a || # A-Z
+ $o >= 0x61 && $o <= 0x7a || # a-z
+ $o >= 0x30 && $o <= 0x39 || # 0-9
$c == '-';
}
}