diff --git a/README.md b/README.md index 9313d87a..7c8dc278 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,9 @@ the 1.1 release of RDF.rb: Notably, {RDF::Queryable#query} and {RDF::Query#execute} are now completely symmetric; this allows an implementation of {RDF::Queryable} to optimize queries using implementation-specific logic, allowing for substantial performance improvements when executing BGP queries. +## Differences between RDF 1.1 and RDF 1.2 +* {RDF::Literal} has an optional `direction` property for directional language-tagged strings. + ## Tutorials * [Getting data from the Semantic Web using Ruby and RDF.rb](https://semanticweb.org/wiki/Getting_data_from_the_Semantic_Web_%28Ruby%29) @@ -400,6 +403,7 @@ from BNode identity (i.e., they each entail the other) * [Ruby](https://ruby-lang.org/) (>= 2.6) * [LinkHeader][] (>= 0.0.8) +* [bcp47_spec][] ( ~> 0.2) * Soft dependency on [RestClient][] (>= 2.1) ## Installation @@ -481,8 +485,10 @@ This is free and unencumbered public domain software. For more information, see or the accompanying {file:UNLICENSE} file. [RDF]: https://www.w3.org/RDF/ -[N-Triples]: https://www.w3.org/TR/n-triples/ -[N-Quads]: https://www.w3.org/TR/n-quads/ +[LinkHeader]: https://github.com/asplake/link_header +[bcp47_spec]: https://github.com/dadah89/bcp47_spec +[N-Triples]: https://www.w3.org/TR/rdf-n-triples/ +[N-Quads]: https://www.w3.org/TR/rdf-n-quads/ [YARD]: https://yardoc.org/ [YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md [PDD]: https://unlicense.org/#unlicensing-contributions @@ -496,6 +502,7 @@ see or the accompanying {file:UNLICENSE} file. [SPARQL doc]: https://ruby-rdf.github.io/sparql [RDF 1.0]: https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/ [RDF 1.1]: https://www.w3.org/TR/rdf11-concepts/ +[RDF 1.1]: https://www.w3.org/TR/rdf12-concepts/ [SPARQL 1.1]: https://www.w3.org/TR/sparql11-query/ [RDF.rb]: https://ruby-rdf.github.io/ [RDF::DO]: https://ruby-rdf.github.io/rdf-do diff --git a/etc/n-triples.ebnf b/etc/n-triples.ebnf index 1b413c43..e973cb91 100644 --- a/etc/n-triples.ebnf +++ b/etc/n-triples.ebnf @@ -1,6 +1,40 @@ -[1] ntriplesDoc ::= triple? (EOL triple)* EOL? -[2] triple ::= subject predicate object '.' -[3] subject ::= IRIREF | BLANK_NODE_LABEL -[4] predicate ::= IRIREF -[5] object ::= IRIREF | BLANK_NODE_LABEL | literal -[6] literal ::= STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)? +ntriplesDoc ::= triple? (EOL triple)* EOL? +triple ::= subject predicate object '.' +subject ::= IRIREF | BLANK_NODE_LABEL | quotedTriple +predicate ::= IRIREF +object ::= IRIREF | BLANK_NODE_LABEL | literal | quotedTriple +literal ::= STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG )? +quotedTriple ::= '<<' subject predicate object '>>' + +@terminals + +IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' +BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? +LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )* ('--' ('ltr'|'rtl'))?` +STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"' +UCHAR ::= ( "\u" HEX HEX HEX HEX ) + | ( "\U" HEX HEX HEX HEX HEX HEX HEX HEX ) +ECHAR ::= ("\" [tbnrf"']) +PN_CHARS_BASE ::= ([A-Z] + | [a-z] + | [#x00C0-#x00D6] + | [#x00D8-#x00F6] + | [#x00F8-#x02FF] + | [#x0370-#x037D] + | [#x037F-#x1FFF] + | [#x200C-#x200D] + | [#x2070-#x218F] + | [#x2C00-#x2FEF] + | [#x3001-#xD7FF] + | [#xF900-#xFDCF] + | [#xFDF0-#xFFFD] + | [#x10000-#xEFFFF]) +PN_CHARS_U ::= PN_CHARS_BASE | '_' +PN_CHARS ::= (PN_CHARS_U + | "-" + | [0-9] + | #x00B7 + | [#x0300-#x036F] + | [#x203F-#x2040]) +HEX ::= ([0-9] | [A-F] | [a-f]) +EOL ::= [#xD#xA]+ diff --git a/lib/rdf/model/literal.rb b/lib/rdf/model/literal.rb index 9760afd6..28e06f04 100644 --- a/lib/rdf/model/literal.rb +++ b/lib/rdf/model/literal.rb @@ -1,4 +1,7 @@ # -*- encoding: utf-8 -*- + +require 'bcp47_spec' + module RDF ## # An RDF literal. @@ -9,7 +12,9 @@ module RDF # # Specific typed literals may have behavior different from the default implementation. See the following defined sub-classes for specific documentation. Additional sub-classes may be defined, and will interoperate by defining `DATATYPE` and `GRAMMAR` constants, in addition other required overrides of RDF::Literal behavior. # - # In RDF 1.1, all literals are typed, including plain literals and language tagged literals. Internally, plain literals are given the `xsd:string` datatype and language tagged literals are given the `rdf:langString` datatype. Creating a plain literal, without a datatype or language, will automatically provide the `xsd:string` datatype; similar for language tagged literals. Note that most serialization formats will remove this datatype. Code which depends on a literal having the `xsd:string` datatype being different from a plain literal (formally, without a datatype) may break. However note that the `#has\_datatype?` will continue to return `false` for plain or language-tagged literals. + # In RDF 1.1, all literals are typed, including plain literals and language-tagged strings. Internally, plain literals are given the `xsd:string` datatype and language-tagged strings are given the `rdf:langString` datatype. Creating a plain literal, without a datatype or language, will automatically provide the `xsd:string` datatype; similar for language-tagged strings. Note that most serialization formats will remove this datatype. Code which depends on a literal having the `xsd:string` datatype being different from a plain literal (formally, without a datatype) may break. However note that the `#has\_datatype?` will continue to return `false` for plain or language-tagged strings. + # + # RDF 1.2 adds **directional language-tagged strings** which are effectively a subclass of **language-tagged strings** contining an additional **direction** component with value either **ltr** or **rtl** for Left-to-Right or Right-to-Left. This determines the general direction of a string when presented in n a user agent, where it might be in conflict with the inherent direction of the leading Unicode code points. Directional language-tagged strings are given the `rdf:langString` datatype. # # * {RDF::Literal::Boolean} # * {RDF::Literal::Date} @@ -23,16 +28,23 @@ module RDF # value = RDF::Literal.new("Hello, world!") # value.plain? #=> true` # - # @example Creating a language-tagged literal (1) + # @example Creating a language-tagged string (1) # value = RDF::Literal.new("Hello!", language: :en) # value.language? #=> true # value.language #=> :en # - # @example Creating a language-tagged literal (2) + # @example Creating a language-tagged string (2) # RDF::Literal.new("Wazup?", language: :"en-US") # RDF::Literal.new("Hej!", language: :sv) # RDF::Literal.new("¡Hola!", language: :es) # + # @example Creating a directional language-tagged string + # value = RDF::Literal.new("Hello!", language: :en, direction: :ltr) + # value.language? #=> true + # value.language #=> :en + # value.direction? #=> true + # value.direction #=> :ltr + # # @example Creating an explicitly datatyped literal # value = RDF::Literal.new("2009-12-31", datatype: RDF::XSD.date) # value.datatype? #=> true @@ -105,8 +117,14 @@ def self.datatyped_class(uri) ## # @private - def self.new(value, language: nil, datatype: nil, lexical: nil, validate: false, canonicalize: false, **options) - raise ArgumentError, "datatype with language must be rdf:langString" if language && (datatype || RDF.langString).to_s != RDF.langString.to_s + def self.new(value, language: nil, datatype: nil, direction: nil, lexical: nil, validate: false, canonicalize: false, **options) + if language && direction + raise ArgumentError, "datatype with language and direction must be rdf:dirLangString" if (datatype || RDF.dirLangString).to_s != RDF.dirLangString.to_s + elsif language + raise ArgumentError, "datatype with language must be rdf:langString" if (datatype || RDF.langString).to_s != RDF.langString.to_s + else + raise ArgumentError, "datatype not compatible with language or direction" if language || direction + end klass = case when !self.equal?(RDF::Literal) @@ -128,7 +146,7 @@ def self.new(value, language: nil, datatype: nil, lexical: nil, validate: false, end end literal = klass.allocate - literal.send(:initialize, value, language: language, datatype: datatype, **options) + literal.send(:initialize, value, language: language, datatype: datatype, direction: direction, **options) literal.validate! if validate literal.canonicalize! if canonicalize literal @@ -137,18 +155,24 @@ def self.new(value, language: nil, datatype: nil, lexical: nil, validate: false, TRUE = RDF::Literal.new(true) FALSE = RDF::Literal.new(false) ZERO = RDF::Literal.new(0) + XSD_STRING = RDF::URI("http://www.w3.org/2001/XMLSchema#string") - # @return [Symbol] The language tag (optional). + # @return [Symbol] The language-tag (optional). Implies `datatype` is `rdf:langString`. attr_accessor :language + # @return [Symbol] The base direction (optional). Implies `datatype` is `rdf:dirLangString`. + attr_accessor :direction + # @return [URI] The XML Schema datatype URI (optional). attr_accessor :datatype ## - # Literals without a datatype are given either xsd:string or rdf:langString - # depending on if there is language + # Literals without a datatype are given either `xsd:string`, `rdf:langString`, or `rdf:dirLangString`, + # depending on if there is `language` and/or `direction`. # # @param [Object] value + # @param [Symbol] direction (nil) + # Initial text direction. # @param [Symbol] language (nil) # Language is downcased to ensure proper matching # @param [String] lexical (nil) @@ -163,16 +187,24 @@ def self.new(value, language: nil, datatype: nil, lexical: nil, validate: false, # @see http://www.w3.org/TR/rdf11-concepts/#section-Graph-Literal # @see http://www.w3.org/TR/rdf11-concepts/#section-Datatypes # @see #to_s - def initialize(value, language: nil, datatype: nil, lexical: nil, validate: false, canonicalize: false, **options) + def initialize(value, language: nil, datatype: nil, direction: nil, lexical: nil, validate: false, canonicalize: false, **options) @object = value.freeze @string = lexical if lexical @string = value if !defined?(@string) && value.is_a?(String) @string = @string.encode(Encoding::UTF_8).freeze if instance_variable_defined?(:@string) @object = @string if instance_variable_defined?(:@string) && @object.is_a?(String) @language = language.to_s.downcase.to_sym if language + @direction = direction.to_s.downcase.to_sym if direction @datatype = RDF::URI(datatype).freeze if datatype @datatype ||= self.class.const_get(:DATATYPE) if self.class.const_defined?(:DATATYPE) - @datatype ||= instance_variable_defined?(:@language) && @language ? RDF.langString : RDF::URI("http://www.w3.org/2001/XMLSchema#string") + @datatype ||= if instance_variable_defined?(:@language) && @language && + instance_variable_defined?(:@direction) && @direction + RDF.dirLangString + elsif instance_variable_defined?(:@language) && @language + RDF.langString + else + XSD_STRING + end end ## @@ -202,8 +234,8 @@ def literal? # # Compatibility of two arguments is defined as: # * The arguments are simple literals or literals typed as xsd:string - # * The arguments are plain literals with identical language tags - # * The first argument is a plain literal with language tag and the second argument is a simple literal or literal typed as xsd:string + # * The arguments are plain literals with identical language-tags and directions + # * The first argument is a plain literal with language-tag and the second argument is a simple literal or literal typed as xsd:string # # @example # compatible?("abc" "b") #=> true @@ -224,11 +256,11 @@ def compatible?(other) return false unless other.literal? && plain? && other.plain? # * The arguments are simple literals or literals typed as xsd:string - # * The arguments are plain literals with identical language tags - # * The first argument is a plain literal with language tag and the second argument is a simple literal or literal typed as xsd:string - language? ? - (language == other.language || other.datatype == RDF::URI("http://www.w3.org/2001/XMLSchema#string")) : - other.datatype == RDF::URI("http://www.w3.org/2001/XMLSchema#string") + # * The arguments are plain literals with identical language-tags + # * The first argument is a plain literal with language-tag and the second argument is a simple literal or literal typed as xsd:string + language? || direction? ? + (language == other.language && direction == other.direction || other.datatype == XSD_STRING) : + other.datatype == XSD_STRING end ## @@ -236,7 +268,7 @@ def compatible?(other) # # @return [Integer] def hash - @hash ||= [to_s, datatype, language].hash + @hash ||= [to_s, datatype, language, direction].compact.hash end @@ -270,6 +302,7 @@ def eql?(other) self.value_hash == other.value_hash && self.value.eql?(other.value) && self.language.to_s.eql?(other.language.to_s) && + self.direction.to_s.eql?(other.direction.to_s) && self.datatype.eql?(other.datatype)) end @@ -290,7 +323,10 @@ def ==(other) case when self.eql?(other) true - when self.language? && self.language.to_s == other.language.to_s + when self.direction? && self.direction == other.direction + # Literals with directions can compare if languages and directions are identical + self.value_hash == other.value_hash && self.value == other.value + when self.language? && self.language == other.language # Literals with languages can compare if languages are identical self.value_hash == other.value_hash && self.value == other.value when self.simple? && other.simple? @@ -342,14 +378,18 @@ def <=>(other) ## # Returns `true` if this is a plain literal. A plain literal - # may have a language, but may not have a datatype. For + # may have a language and direction, but may not have a datatype. For # all practical purposes, this includes xsd:string literals # too. # # @return [Boolean] `true` or `false` # @see http://www.w3.org/TR/rdf-concepts/#dfn-plain-literal def plain? - [RDF.langString, RDF::URI("http://www.w3.org/2001/XMLSchema#string")].include?(datatype) + [ + RDF.langString, + RDF.dirLangString, + XSD_STRING + ].include?(datatype) end ## @@ -359,19 +399,28 @@ def plain? # @return [Boolean] `true` or `false` # @see http://www.w3.org/TR/sparql11-query/#simple_literal def simple? - datatype == RDF::URI("http://www.w3.org/2001/XMLSchema#string") + datatype == XSD_STRING end ## - # Returns `true` if this is a language-tagged literal. + # Returns `true` if this is a language-tagged string. # # @return [Boolean] `true` or `false` - # @see http://www.w3.org/TR/rdf-concepts/#dfn-plain-literal + # @see https://www.w3.org/TR/rdf-concepts/#dfn-language-tagged-string def language? - datatype == RDF.langString + [RDF.langString, RDF.dirLangString].include?(datatype) end alias_method :has_language?, :language? + ## + # Returns `true` if this is a directional language-tagged string. + # + # @return [Boolean] `true` or `false` + # @see https://www.w3.org/TR/rdf-concepts/#dfn-dir-lang-string + def direction? + datatype == RDF.dirLangString + end + ## # Returns `true` if this is a datatyped literal. # @@ -380,7 +429,7 @@ def language? # @return [Boolean] `true` or `false` # @see http://www.w3.org/TR/rdf-concepts/#dfn-typed-literal def datatype? - !plain? && !language? + !plain? && !language? && !direction? end alias_method :has_datatype?, :datatype? alias_method :typed?, :datatype? @@ -393,10 +442,13 @@ def datatype? # @return [Boolean] `true` or `false` # @since 0.2.1 def valid? - return false if language? && language.to_s !~ /^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$/ + BCP47.parse(language.to_s) if language? + return false if direction? && !%i{ltr rtl}.include?(direction) return false if datatype? && datatype.invalid? grammar = self.class.const_get(:GRAMMAR) rescue nil grammar.nil? || value.match?(grammar) + rescue BCP47::InvalidLanguageTag + false end ## @@ -536,12 +588,12 @@ def inspect ## # @overload #to_str - # This method is implemented when the datatype is `xsd:string` or `rdf:langString` + # This method is implemented when the datatype is `xsd:string`, `rdf:langString`, or `rdf:dirLangString` # @return [String] def method_missing(name, *args) case name when :to_str - return to_s if @datatype == RDF.langString || @datatype == RDF::URI("http://www.w3.org/2001/XMLSchema#string") + return to_s if [RDF.langString, RDF.dirLangString, XSD_STRING].include?(@datatype) end super end @@ -549,7 +601,7 @@ def method_missing(name, *args) def respond_to_missing?(name, include_private = false) case name when :to_str - return true if @datatype == RDF.langString || @datatype == RDF::URI("http://www.w3.org/2001/XMLSchema#string") + return true if [RDF.langString, RDF.dirLangString, XSD_STRING].include?(@datatype) end super end diff --git a/lib/rdf/ntriples.rb b/lib/rdf/ntriples.rb index ba6198fa..f74a29bb 100644 --- a/lib/rdf/ntriples.rb +++ b/lib/rdf/ntriples.rb @@ -15,7 +15,7 @@ module RDF # # "rdf" . # - # ## RDFStar (RDF*) + # ## Quoted Triples # # Supports statements as resources using `<>`. # diff --git a/lib/rdf/ntriples/reader.rb b/lib/rdf/ntriples/reader.rb index 03e0eb29..c0c4c0c5 100644 --- a/lib/rdf/ntriples/reader.rb +++ b/lib/rdf/ntriples/reader.rb @@ -60,24 +60,16 @@ class Reader < RDF::Reader U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]").freeze IRI_RANGE = Regexp.compile("[[^<>\"{}\|\^`\\\\]&&[^\\x00-\\x20]]").freeze - # 163s PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/.freeze - # 164s PN_CHARS_U = /_|#{PN_CHARS_BASE}/.freeze - # 166s PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/.freeze - # 159s ECHAR = /\\[tbnrf"'\\]/.freeze - # 18 + IRIREF = /<((?:#{IRI_RANGE}|#{UCHAR})*)>/.freeze - # 141s BLANK_NODE_LABEL = /_:((?:[0-9]|#{PN_CHARS_U})(?:(?:#{PN_CHARS}|\.)*#{PN_CHARS})?)/.freeze - # 144s - LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/.freeze - # 22 + LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*(?:--[a-zA-Z0-9]+)?)/.freeze STRING_LITERAL_QUOTE = /"((?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*)"/.freeze - # RDF* ST_START = /^<>/.freeze @@ -299,8 +291,9 @@ def read_literal if literal_str = match(LITERAL_PLAIN) literal_str = self.class.unescape(literal_str) literal = case - when language = match(LANGTAG) - RDF::Literal.new(literal_str, language: language) + when lang_dir = match(LANGTAG) + language, direction = lang_dir.split('--') + RDF::Literal.new(literal_str, language: language, direction: direction) when datatype = match(/^(\^\^)/) # FIXME RDF::Literal.new(literal_str, datatype: read_uriref || fail_object) else @@ -310,6 +303,10 @@ def read_literal literal.canonicalize! if canonicalize? literal end + rescue ArgumentError + v = literal_str + v += "@#{lang_dir}" if lang_dir + log_error("Invalid Literal (found: \"#{v}\")", lineno: lineno, token: "#v", exception: RDF::ReaderError) end ## diff --git a/lib/rdf/ntriples/writer.rb b/lib/rdf/ntriples/writer.rb index c155bd74..4b3b6411 100644 --- a/lib/rdf/ntriples/writer.rb +++ b/lib/rdf/ntriples/writer.rb @@ -312,6 +312,7 @@ def format_literal(literal, **options) # Note, escaping here is more robust than in Term text = quoted(escaped(literal.value)) text << "@#{literal.language}" if literal.language? + text << "--#{literal.direction}" if literal.direction? text << "^^<#{uri_for(literal.datatype)}>" if literal.datatype? text else diff --git a/lib/rdf/vocab/rdfv.rb b/lib/rdf/vocab/rdfv.rb index 31f82e0b..03560665 100644 --- a/lib/rdf/vocab/rdfv.rb +++ b/lib/rdf/vocab/rdfv.rb @@ -92,6 +92,10 @@ module RDF # # @return [RDF::Vocabulary::Term] # # @attr_reader :langString # + # # The datatype of directional language-tagged string values. + # # @return [RDF::Vocabulary::Term] + # # @attr_reader :dirLangString + # # # RDF/XML node element. # # @return [RDF::Vocabulary::Term] # # @attr_reader :Description @@ -283,6 +287,13 @@ def name; "RDF"; end "http://www.w3.org/2000/01/rdf-schema#seeAlso": %(http://www.w3.org/TR/rdf11-concepts/#section-Graph-Literal).freeze, subClassOf: "http://www.w3.org/2000/01/rdf-schema#Literal".freeze, type: "http://www.w3.org/2000/01/rdf-schema#Datatype".freeze + term :dirLangString, + comment: %(The datatype of directional language-tagged string values).freeze, + label: "dirLangString".freeze, + isDefinedBy: %(http://www.w3.org/1999/02/22-rdf-syntax-ns#).freeze, + "http://www.w3.org/2000/01/rdf-schema#seeAlso": %(http://www.w3.org/TR/rdf11-concepts/#section-Graph-Literal).freeze, + subClassOf: "http://www.w3.org/2000/01/rdf-schema#Literal".freeze, + type: "http://www.w3.org/2000/01/rdf-schema#Datatype".freeze # Extra definitions term :Description, diff --git a/rdf.gemspec b/rdf.gemspec index a6f3daf9..432e04bd 100755 --- a/rdf.gemspec +++ b/rdf.gemspec @@ -30,6 +30,7 @@ Gem::Specification.new do |gem| gem.required_ruby_version = '>= 2.6' gem.requirements = [] gem.add_runtime_dependency 'link_header', '~> 0.0', '>= 0.0.8' + gem.add_runtime_dependency 'bcp47_spec', '~> 0.2' gem.add_development_dependency 'rdf-spec', '~> 3.2' gem.add_development_dependency 'rdf-turtle', '~> 3.2' gem.add_development_dependency 'rdf-vocab', '~> 3.2' diff --git a/spec/model_literal_spec.rb b/spec/model_literal_spec.rb index 2aa3ce01..de00da2b 100644 --- a/spec/model_literal_spec.rb +++ b/spec/model_literal_spec.rb @@ -1,4 +1,5 @@ # coding: utf-8 +# frozen_string_literal: true require_relative 'spec_helper' require 'rdf/spec/literal' require 'rdf/xsd' @@ -8,15 +9,19 @@ def self.literal(selector) case selector - when :empty then [''.freeze] - when :plain then ['Hello'.freeze] - when :empty_lang then [''.freeze, {language: :en}] - when :plain_lang then ['Hello'.freeze, {language: :en}] + when :empty then [''] + when :plain then ['Hello'] + when :empty_lang then ['', {language: :en}] + when :plain_lang then ['Hello', {language: :en}] # langString language: must not contain spaces - when :wrong_lang then ['WrongLang'.freeze, {language: "en f"}] + when :wrong_lang then ['WrongLang', {language: "en f"}] # langString language: must be non-empty valid language - when :unset_lang then ['NoLanguage'.freeze, {datatype: RDF::langString}] - when :string then ['String'.freeze, {datatype: RDF::XSD.string}] + when :unset_lang then ['NoLanguage', {datatype: RDF.langString}] + when :lang_dir then ['Hello', {language: :en, direction: :ltr}] + when :wrong_dir then ['Hello', {language: :en, direction: "center-out"}] + when :dir_no_lang then ['Hello', {direction: :ltr}] + when :unset_dir then ['NoDir', {language: :en, datatype: RDF.dirLangString}] + when :string then ['String', {datatype: RDF::XSD.string}] when :false then [false] when :true then [true] when :int then [123] @@ -35,9 +40,9 @@ def self.literals(*selector) selector.inject([]) do |ary, sel| ary += case sel when :all_simple then %i(empty plain string).map {|s| literal(s)} - when :all_plain_lang then %i(empty_lang plain_lang).map {|s| literal(s)} + when :all_plain_lang then %i(empty_lang plain_lang lang_dir).map {|s| literal(s)} when :all_native then %i(false true int long decimal double time date datetime).map {|s| literal(s)} - when :all_invalid_lang then %i(wrong_lang unset_lang).map {|s| literal(s)} + when :all_invalid_lang then %i(wrong_lang unset_lang wrong_dir).map {|s| literal(s)} when :all_plain then literals(:all_simple, :all_plain_lang) else literals(:all_plain, :all_native) end @@ -2327,6 +2332,7 @@ def self.literals(*selector) { "language with xsd:string" => {value: "foo", language: "en", datatype: RDF::XSD.string}, "language with xsd:date" => {value: "foo", language: "en", datatype: RDF::XSD.date}, + "direction without language" => {value: "foo", direction: "ltr"} }.each do |name, opts| it "raises error for #{name}" do expect {RDF::Literal.new(opts.delete(:value), **opts)}.to raise_error(ArgumentError) diff --git a/spec/ntriples_spec.rb b/spec/ntriples_spec.rb index 1622cdf1..3728b03f 100644 --- a/spec/ntriples_spec.rb +++ b/spec/ntriples_spec.rb @@ -310,6 +310,22 @@ end end + context 'parse language/direction' do + { + "language" => ' "Hello"@en .', + "direction" => ' "Hello"@en--ltr .', + }.each_pair do |name, triple| + specify "test #{name}" do + stmt = reader.new(triple).first + if name.include?('dir') + expect(stmt.object.datatype).to eql RDF.dirLangString + else + expect(stmt.object.datatype).to eql RDF.langString + end + end + end + end + context 'should parse a value that was written without passing through the writer encoding' do [ %( "Procreation Metaphors in S\xC3\xA9an \xC3\x93 R\xC3\xADord\xC3\xA1in's Poetry" .), @@ -353,8 +369,9 @@ "XML Literals as Datatyped Literals (8)" => ' "a\n\nc"^^ .', "XML Literals as Datatyped Literals (9)" => ' "chat"^^ .', - "Plain literals with languages (1)" => ' "chat"@fr .', - "Plain literals with languages (2)" => ' "chat"@en .', + "Literals with languages (1)" => ' "chat"@fr .', + "Literals with languages (2)" => ' "chat"@en .', + #"Literals with language and direction" => ' "chat"@en--ltr .', "Typed Literals" => ' "abc"^^ .', "Plain lieral with embedded quote" => %q( "From \\"Voyage dans l’intérieur de l’Amérique du Nord, executé pendant les années 1832, 1833 et 1834, par le prince Maximilien de Wied-Neuwied\\" (Paris & Coblenz, 1839-1843)" .), }.each_pair do |name, nt| @@ -479,6 +496,18 @@ %q( "string"@1 .), %r(Expected end of statement \(found: "@1 \."\)) ], + "xx bad lang 2" => [ + %q( "string"@cantbethislong .), + %r(Invalid Literal) + ], + "xx bad dir 1" => [ + %q( "string"@en--UTD .), + %r(Invalid Literal) + ], + "xx bad dir 2" => [ + %q( "string"@--ltr .), + %r(Expected end of statement) + ], "nt-syntax-bad-string-05" => [ %q( """abc""" .), %r(Expected end of statement \(found: .* \."\)) @@ -608,6 +637,10 @@ expect(writer.new.format_literal(RDF::Literal.new('Hello, world!', language: :en))).to eq '"Hello, world!"@en' end + it "should correctly format directional language-tagged literals" do + expect(writer.new.format_literal(RDF::Literal.new('Hello, world!', language: :en, direction: :ltr))).to eq '"Hello, world!"@en--ltr' + end + it "should correctly format datatyped literals" do expect(writer.new.format_literal(RDF::Literal.new(3.1415))).to eq '"3.1415"^^' end