diff --git a/.github/workflows/ruby-unit-test.yml b/.github/workflows/ruby-unit-test.yml index 906e1ad5..e228251a 100644 --- a/.github/workflows/ruby-unit-test.yml +++ b/.github/workflows/ruby-unit-test.yml @@ -12,13 +12,14 @@ jobs: strategy: fail-fast: false matrix: - ruby-version: ['2.7', '3.0'] - triplestore: ['fs', 'ag'] + goo-slice: [ '20', '100', '500' ] + ruby-version: [ '2.7', '3.0' ] + triplestore: [ 'fs', 'ag', 'vo', 'gb' ] steps: - uses: actions/checkout@v4 - name: Install Dependencies - run: sudo apt-get -y install raptor2-utils + run: sudo apt-get update && sudo apt-get -y install raptor2-utils - name: Set up Ruby uses: ruby/setup-ruby@v1 with: @@ -27,8 +28,10 @@ jobs: - name: Add config file # tempoaray workaround for the config.rb file requirement run: echo 'Goo.config do |config| end' > config/config.rb + - name: List directory contents + run: ls -R ./test/data - name: Run tests - run: bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" + run: GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..6a81b4c8 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +2.7.8 diff --git a/Gemfile b/Gemfile index bfbe2049..195c242d 100644 --- a/Gemfile +++ b/Gemfile @@ -6,6 +6,7 @@ gem "activesupport" gem "cube-ruby", require: "cube" gem "rake" gem "uuid" +gem "request_store" group :test do gem "minitest", '< 5.0' diff --git a/Gemfile.lock b/Gemfile.lock index b6867344..40b41992 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -84,6 +84,8 @@ GEM redis-client (>= 0.22.0) redis-client (0.22.1) connection_pool + request_store (1.6.0) + rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) @@ -121,6 +123,8 @@ GEM PLATFORMS x86_64-darwin-18 + x86_64-darwin-23 + x86_64-linux DEPENDENCIES activesupport @@ -131,6 +135,7 @@ DEPENDENCIES rack-accept rack-post-body-to-params rake + request_store simplecov simplecov-cobertura sinatra @@ -139,4 +144,4 @@ DEPENDENCIES uuid BUNDLED WITH - 2.3.15 + 2.4.22 diff --git a/docker-compose.yml b/docker-compose.yml index f66e7fef..6bd6cd56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,7 @@ services: retries: 30 solr-ut: - image: ontoportal/solr-ut:0.1.0 + image: ontoportal/solr-ut:0.0.2 ports: - 8983:8983 healthcheck: @@ -28,22 +28,23 @@ services: - AGRAPH_SUPER_PASSWORD=xyzzy shm_size: 1g ports: + # - 10035:10035 - 10000-10035:10000-10035 volumes: - agdata:/agraph/data # - ./agraph/etc:/agraph/etc command: > - bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start - ; agtool repos create ontoportal_test --supersede - ; agtool users add anonymous - ; agtool users grant anonymous root:ontoportal_test:rw - ; tail -f /agraph/data/agraph.log" - healthcheck: - test: ["CMD-SHELL", "agtool storage-report ontoportal_test || exit 1"] - start_period: 30s - interval: 10s - timeout: 10s - retries: 10 + bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start + ; agtool repos create ontoportal_test --supersede + ; agtool users add anonymous + ; agtool users grant anonymous root:ontoportal_test:rw + ; tail -f /agraph/data/agraph.log" + # healthcheck: + # test: ["CMD-SHELL", "curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1"] + # start_period: 10s + # interval: 10s + # timeout: 5s + # retries: 5 profiles: - ag @@ -58,5 +59,41 @@ services: && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" profiles: - fs + virtuoso-ut: + image: tenforce/virtuoso:virtuoso7.2.5 + platform: linux/amd64 + environment: + - SPARQL_UPDATE=true + ports: + - 1111:1111 + - 8890:8890 + + profiles: + - vo + + graphdb: + image: ontotext/graphdb:10.3.3 + platform: linux/amd64 + privileged: true + environment: + GDB_HEAP_SIZE: 5G + GDB_JAVA_OPTS: >- + -Xms5g -Xmx5g + ports: + - 7200:7200 + - 7300:7300 + volumes: + - ./test/data/graphdb-repo-config.ttl:/opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl + - ./test/data/graphdb-test-load.nt:/opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt + + entrypoint: > + bash -c " importrdf load -f -c /opt/graphdb/dist/configs/templates/data/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/data/graphdb-test-load.nt ; graphdb -Ddefault.min.distinct.threshold=3000 " + profiles: + - gb + volumes: agdata: + + + + diff --git a/lib/goo.rb b/lib/goo.rb index 1d38a151..591d5d86 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -30,6 +30,11 @@ module Goo @@resource_options = Set.new([:persistent]).freeze + # Define the languages from which the properties values will be taken + # It choose the first language that match otherwise return all the values + @@main_languages = %w[en] + @@requested_language = nil + @@configure_flag = false @@sparql_backends = {} @@model_by_name = {} @@ -47,6 +52,52 @@ module Goo @@slice_loading_size = 500 + + + def self.log_debug_file(str) + debug_file = "./queries.txt" + File.write(debug_file, str.to_s + "\n", mode: 'a') + end + + + + def backend_4s? + sparql_backend_name.downcase.eql?("4store") + end + + def backend_ag? + sparql_backend_name.downcase.eql?("allegrograph") + end + + def backend_gb? + sparql_backend_name.downcase.eql?("graphdb") + end + + def backend_vo? + sparql_backend_name.downcase.eql?("virtuoso") + end + + + def self.main_languages + @@main_languages + end + def self.main_languages=(lang) + @@main_languages = lang + end + + def self.requested_language + @@requested_language + end + + def self.requested_language=(lang) + @@requested_language = lang + end + + def self.language_includes(lang) + lang_str = lang.to_s + main_languages.index { |l| lang_str.downcase.eql?(l) || lang_str.upcase.eql?(l)} + end + def self.add_namespace(shortcut, namespace,default=false) if !(namespace.instance_of? RDF::Vocabulary) raise ArgumentError, "Namespace must be a RDF::Vocabulary object" diff --git a/lib/goo/base/resource.rb b/lib/goo/base/resource.rb index e82265d4..4d497f8b 100644 --- a/lib/goo/base/resource.rb +++ b/lib/goo/base/resource.rb @@ -15,7 +15,7 @@ class Resource attr_reader :modified_attributes attr_reader :errors attr_reader :aggregates - attr_reader :unmapped + attr_writer :unmapped attr_reader :id @@ -42,9 +42,7 @@ def valid? self.class.attributes.each do |attr| inst_value = self.instance_variable_get("@#{attr}") attr_errors = Goo::Validators::Enforce.enforce(self,attr,inst_value) - unless attr_errors.nil? - validation_errors[attr] = attr_errors - end + validation_errors[attr] = attr_errors unless attr_errors.nil? end if !@persistent && validation_errors.length == 0 @@ -70,9 +68,7 @@ def valid? end def id=(new_id) - if !@id.nil? and @persistent - raise ArgumentError, "The id of a persistent object cannot be changed." - end + raise ArgumentError, "The id of a persistent object cannot be changed." if !@id.nil? and @persistent raise ArgumentError, "ID must be an RDF::URI" unless new_id.kind_of?(RDF::URI) @id = new_id end @@ -123,22 +119,32 @@ def missing_load_attributes def unmapped_set(attribute,value) @unmapped ||= {} - (@unmapped[attribute] ||= Set.new) << value + @unmapped[attribute] ||= Set.new + @unmapped[attribute].merge(Array(value)) unless value.nil? + end + + def unmapped_get(attribute) + @unmapped[attribute] end def unmmaped_to_array cpy = {} + @unmapped.each do |attr,v| cpy[attr] = v.to_a end @unmapped = cpy end + def unmapped(*args) + @unmapped&.transform_values do |language_values| + self.class.not_show_all_languages?(language_values, args) ? language_values.values.flatten: language_values + end + end + def delete(*args) if self.kind_of?(Goo::Base::Enum) - unless args[0] && args[0][:init_enum] - raise ArgumentError, "Enums cannot be deleted" - end + raise ArgumentError, "Enums cannot be deleted" unless args[0] && args[0][:init_enum] end raise ArgumentError, "This object is not persistent and cannot be deleted" if !@persistent @@ -146,9 +152,7 @@ def delete(*args) if !fully_loaded? missing = missing_load_attributes options_load = { models: [ self ], klass: self.class, :include => missing } - if self.class.collection_opts - options_load[:collection] = self.collection - end + options_load[:collection] = self.collection if self.class.collection_opts Goo::SPARQL::Queries.model_load(options_load) end @@ -164,9 +168,7 @@ def delete(*args) end @persistent = false @modified = true - if self.class.inmutable? && self.class.inm_instances - self.class.load_inmutable_instances - end + self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances return nil end @@ -174,15 +176,11 @@ def bring(*opts) opts.each do |k| if k.kind_of?(Hash) k.each do |k2,v| - if self.class.handler?(k2) - raise ArgumentError, "Unable to bring a method based attr #{k2}" - end + raise ArgumentError, "Unable to bring a method based attr #{k2}" if self.class.handler?(k2) self.instance_variable_set("@#{k2}",nil) end else - if self.class.handler?(k) - raise ArgumentError, "Unable to bring a method based attr #{k}" - end + raise ArgumentError, "Unable to bring a method based attr #{k}" if self.class.handler?(k) self.instance_variable_set("@#{k}",nil) end end @@ -197,9 +195,7 @@ def bring(*opts) def graph opts = self.class.collection_opts - if opts.nil? - return self.class.uri_type - end + return self.class.uri_type if opts.nil? col = collection if col.is_a?Array if col.length == 1 @@ -211,79 +207,14 @@ def graph return col ? col.id : nil end - def self.map_attributes(inst,equivalent_predicates=nil) - if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || - (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) - raise ArgumentError, "Resource.map_attributes only works for :unmapped instances" - end - klass = inst.respond_to?(:klass) ? inst[:klass] : inst.class - unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped - list_attrs = klass.attributes(:list) - unmapped_string_keys = Hash.new - unmapped.each do |k,v| - unmapped_string_keys[k.to_s] = v - end - klass.attributes.each do |attr| - next if inst.class.collection?(attr) #collection is already there - next unless inst.respond_to?(attr) - attr_uri = klass.attribute_uri(attr,inst.collection).to_s - if unmapped_string_keys.include?(attr_uri.to_s) || - (equivalent_predicates && equivalent_predicates.include?(attr_uri)) - object = nil - if !unmapped_string_keys.include?(attr_uri) - equivalent_predicates[attr_uri].each do |eq_attr| - if object.nil? and !unmapped_string_keys[eq_attr].nil? - object = unmapped_string_keys[eq_attr].dup - else - if object.is_a?Array - if !unmapped_string_keys[eq_attr].nil? - object.concat(unmapped_string_keys[eq_attr]) - end - end - end - end - if object.nil? - inst.send("#{attr}=", - list_attrs.include?(attr) ? [] : nil, on_load: true) - next - end - else - object = unmapped_string_keys[attr_uri] - end - object = object.map { |o| o.is_a?(RDF::URI) ? o : o.object } - if klass.range(attr) - object = object.map { |o| - o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } - end - unless list_attrs.include?(attr) - object = object.first - end - if inst.respond_to?(:klass) - inst[attr] = object - else - inst.send("#{attr}=",object, on_load: true) - end - else - inst.send("#{attr}=", - list_attrs.include?(attr) ? [] : nil, on_load: true) - if inst.id.to_s == "http://purl.obolibrary.org/obo/IAO_0000415" - if attr == :definition - # binding.pry - end - end - end - end - end def collection opts = self.class.collection_opts if opts.instance_of?(Symbol) if self.class.attributes.include?(opts) value = self.send("#{opts}") - if value.nil? - raise ArgumentError, "Collection `#{opts}` is nil" - end + raise ArgumentError, "Collection `#{opts}` is nil" if value.nil? return value else raise ArgumentError, "Collection `#{opts}` is not an attribute" @@ -298,26 +229,45 @@ def add_aggregate(attribute,aggregate,value) def save(*opts) if self.kind_of?(Goo::Base::Enum) - unless opts[0] && opts[0][:init_enum] - raise ArgumentError, "Enums can only be created on initialization" - end + raise ArgumentError, "Enums can only be created on initialization" unless opts[0] && opts[0][:init_enum] end batch_file = nil - if opts && opts.length > 0 - if opts.first.is_a?(Hash) && opts.first[:batch] && opts.first[:batch].is_a?(File) + callbacks = true + if opts && opts.length > 0 && opts.first.is_a?(Hash) + if opts.first[:batch] && opts.first[:batch].is_a?(File) batch_file = opts.first[:batch] end + + callbacks = opts.first[:callbacks] end if !batch_file - if not modified? - return self - end + return self if not modified? raise Goo::Base::NotValidException, "Object is not valid. Check errors." unless valid? end + #set default values before saving + unless self.persistent? + self.class.attributes_with_defaults.each do |attr| + value = self.send("#{attr}") + if value.nil? + value = self.class.default(attr).call(self) + self.send("#{attr}=", value) + end + end + end + + #call update callback before saving + if callbacks + self.class.attributes_with_update_callbacks.each do |attr| + Goo::Validators::Enforce.enforce_callbacks(self, attr) + end + end + graph_insert, graph_delete = Goo::SPARQL::Triples.model_update_triples(self) - graph = self.graph() + graph = self.graph + + if graph_delete and graph_delete.size > 0 begin Goo.sparql_update_client.delete_data(graph_delete, graph: graph) @@ -339,7 +289,8 @@ def save(*opts) batch_file.write(lines.join("")) batch_file.flush() else - Goo.sparql_update_client.insert_data(graph_insert, graph: graph) + data = graph_insert.to_a.reduce("") { |acc, x| acc << x.to_s + " " } + Goo.sparql_data_client.execute_append_request(graph, data, "application/x-turtle") end rescue Exception => e raise e @@ -351,9 +302,7 @@ def save(*opts) @modified_attributes = Set.new @persistent = true - if self.class.inmutable? && self.class.inm_instances - self.class.load_inmutable_instances - end + self.class.load_inmutable_instances if self.class.inmutable? && self.class.inm_instances return self end @@ -391,9 +340,7 @@ def to_hash end end @unmapped.each do |attr,values| - unless all_attr_uris.include?(attr) - attr_hash[attr] = values.map { |v| v.to_s } - end + attr_hash[attr] = values.map { |v| v.to_s } unless all_attr_uris.include?(attr) end end attr_hash[:id] = @id @@ -413,13 +360,73 @@ def self.range_object(attr,id) return range_object end - def self.find(id, *options) - if !id.instance_of?(RDF::URI) && self.name_with == :id - id = RDF::URI.new(id) + + + def self.map_attributes(inst,equivalent_predicates=nil, include_languages: false) + if (inst.kind_of?(Goo::Base::Resource) && inst.unmapped.nil?) || + (!inst.respond_to?(:unmapped) && inst[:unmapped].nil?) + raise ArgumentError, "Resource.map_attributes only works for :unmapped instances" end - unless id.instance_of?(RDF::URI) - id = id_from_unique_attribute(name_with(),id) + klass = inst.respond_to?(:klass) ? inst[:klass] : inst.class + unmapped = inst.respond_to?(:klass) ? inst[:unmapped] : inst.unmapped(include_languages: include_languages) + list_attrs = klass.attributes(:list) + unmapped_string_keys = Hash.new + unmapped.each do |k,v| + unmapped_string_keys[k.to_s] = v + end + klass.attributes.each do |attr| + next if inst.class.collection?(attr) #collection is already there + next unless inst.respond_to?(attr) + attr_uri = klass.attribute_uri(attr,inst.collection).to_s + if unmapped_string_keys.include?(attr_uri.to_s) || equivalent_predicates&.include?(attr_uri) + object = nil + + if unmapped_string_keys.include?(attr_uri) + object = unmapped_string_keys[attr_uri] + else + equivalent_predicates[attr_uri].each do |eq_attr| + next if unmapped_string_keys[eq_attr].nil? + + if object.nil? + object = unmapped_string_keys[eq_attr].dup + elsif object.is_a?(Array) + object.concat(unmapped_string_keys[eq_attr]) + end + end + + if object.nil? + inst.send("#{attr}=", list_attrs.include?(attr) ? [] : nil, on_load: true) + next + end + end + + if object.is_a?(Hash) + object = object.transform_values{|values| Array(values).map{|o|o.is_a?(RDF::URI) ? o : o.object}} + else + object = object.map {|o| o.is_a?(RDF::URI) ? o : o.object} + end + + if klass.range(attr) + object = object.map { |o| + o.is_a?(RDF::URI) ? klass.range_object(attr,o) : o } + end + + object = object.first unless list_attrs.include?(attr) || include_languages + if inst.respond_to?(:klass) + inst[attr] = object + else + inst.send("#{attr}=",object, on_load: true) + end + else + inst.send("#{attr}=", + list_attrs.include?(attr) ? [] : nil, on_load: true) + end + end + end + def self.find(id, *options) + id = RDF::URI.new(id) if !id.instance_of?(RDF::URI) && self.name_with == :id + id = id_from_unique_attribute(name_with(),id) unless id.instance_of?(RDF::URI) if self.inmutable? && self.inm_instances && self.inm_instances[id] w = Goo::Base::Where.new(self) w.instance_variable_set("@result", [self.inm_instances[id]]) diff --git a/lib/goo/base/settings/settings.rb b/lib/goo/base/settings/settings.rb index 2a274454..a7008087 100644 --- a/lib/goo/base/settings/settings.rb +++ b/lib/goo/base/settings/settings.rb @@ -1,4 +1,5 @@ require 'active_support/core_ext/string' +require_relative 'yaml_settings' module Goo module Base @@ -12,8 +13,10 @@ module ClassMethods attr_reader :model_name attr_reader :attribute_uris + include YAMLScheme + def default_model_options - return {} + {} end def model(*args) @@ -34,7 +37,9 @@ def model(*args) @model_settings = default_model_options.merge(options || {}) - unless options.include?:name_with + init_yaml_scheme_settings + + unless options.include? :name_with raise ArgumentError, "The model `#{model_name}` definition should include the :name_with option" end Goo.add_model(@model_name,self) @@ -91,6 +96,16 @@ def attributes_with_defaults select{ |attr,opts| opts[:default] }).keys() end + def attributes_with_update_callbacks + (@model_settings[:attributes]. + select{ |attr,opts| opts[:onUpdate] }).keys + end + + + def update_callbacks(attr) + @model_settings[:attributes][attr][:onUpdate] + end + def default(attr) return @model_settings[:attributes][attr][:default] end @@ -185,10 +200,14 @@ def attribute(*args) attr_name = attr_name.to_sym options = options.pop options = {} if options.nil? - if options[:enforce].nil? or !options[:enforce].include?(:list) - options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] - end + + options[:enforce] ||= [] + + set_data_type(options) + set_no_list_by_default(options) + @model_settings[:attributes][attr_name] = options + load_yaml_scheme_options(attr_name) shape_attribute(attr_name) namespace = attribute_namespace(attr_name) namespace = namespace || @model_settings[:namespace] @@ -232,15 +251,13 @@ def shape_attribute(attr) raise ArgumentError, "Method based attributes cannot be set" end if self.class.inverse?(attr) && !(args && args.last.instance_of?(Hash) && args.last[:on_load]) - raise ArgumentError, - "`#{attr}` is an inverse attribute. Values cannot be assigned." + raise ArgumentError, "`#{attr}` is an inverse attribute. Values cannot be assigned." end @loaded_attributes.add(attr) value = args[0] unless args.last.instance_of?(Hash) and args.last[:on_load] if self.persistent? and self.class.name_with == attr - raise ArgumentError, - "`#{attr}` attribute is used to name this resource and cannot be modified." + raise ArgumentError, "`#{attr}` attribute is used to name this resource and cannot be modified." end prev = self.instance_variable_get("@#{attr}") if !prev.nil? and !@modified_attributes.include?(attr) @@ -257,18 +274,29 @@ def shape_attribute(attr) self.instance_variable_set("@#{attr}",value) end define_method("#{attr}") do |*args| + attr_value = self.instance_variable_get("@#{attr}") + + if self.class.not_show_all_languages?(attr_value, args) + is_array = attr_value.values.first.is_a?(Array) + attr_value = attr_value.values.flatten + attr_value = attr_value.first unless is_array + end + + if self.class.handler?(attr) if @loaded_attributes.include?(attr) - return self.instance_variable_get("@#{attr}") + return attr_value end value = self.send("#{self.class.handler(attr)}") self.instance_variable_set("@#{attr}",value) @loaded_attributes << attr return value end + if (not @persistent) or @loaded_attributes.include?(attr) - return self.instance_variable_get("@#{attr}") + return attr_value else + # TODO: bug here when no labels from one of the main_lang available... (when it is called by ontologies_linked_data ontologies_submission) raise Goo::Base::AttributeNotLoaded, "Attribute `#{attr}` is not loaded for #{self.id}. Loaded attributes: #{@loaded_attributes.inspect}." end end @@ -372,6 +400,29 @@ def read_only(attributes) instance end + + def show_all_languages?(args) + args.first.is_a?(Hash) && args.first.keys.include?(:include_languages) && args.first[:include_languages] + end + + def not_show_all_languages?(values, args) + values.is_a?(Hash) && !show_all_languages?(args) + end + + private + + def set_no_list_by_default(options) + if options[:enforce].nil? or !options[:enforce].include?(:list) + options[:enforce] = options[:enforce] ? (options[:enforce] << :no_list) : [:no_list] + end + end + def set_data_type(options) + if options[:type] + options[:enforce] += Array(options[:type]) + options[:enforce].uniq! + options.delete :type + end + end end end end diff --git a/lib/goo/base/settings/yaml_settings.rb b/lib/goo/base/settings/yaml_settings.rb new file mode 100644 index 00000000..8a931b3a --- /dev/null +++ b/lib/goo/base/settings/yaml_settings.rb @@ -0,0 +1,45 @@ +require 'yaml' + +module Goo + module Base + module Settings + module YAMLScheme + attr_reader :yaml_settings + + def init_yaml_scheme_settings + scheme_file_path = @model_settings[:scheme] + @yaml_settings = read_yaml_settings_file(scheme_file_path) + end + + def attribute_yaml_settings(attr) + + return {} if yaml_settings.nil? + + yaml_settings[attr.to_sym] + end + + + + private + + def load_yaml_scheme_options(attr) + settings = attribute_settings(attr) + yaml_settings = attribute_yaml_settings(attr) + settings.merge! yaml_settings unless yaml_settings.nil? || yaml_settings.empty? + end + + def read_yaml_settings_file(scheme_file_path) + return if scheme_file_path.nil? + + yaml_contents = File.read(scheme_file_path) rescue return + + YAML.safe_load(yaml_contents, symbolize_names: true) + end + end + end + end +end + + + + diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index 5bc0fa8c..7aaad6ce 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -6,6 +6,7 @@ class Where AGGREGATE_PATTERN = Struct.new(:pattern,:aggregate) attr_accessor :where_options_load + include Goo::SPARQL::Processor def initialize(klass,*match_patterns) if Goo.queries_debug? && Thread.current[:ncbo_debug].nil? @@ -122,113 +123,7 @@ def unmmaped_predicates() end def process_query(count=false) - if Goo.queries_debug? && Thread.current[:ncbo_debug] - tstart = Time.now - query_resp = process_query_intl(count=count) - (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - tstart) - return query_resp - end - return process_query_intl(count=count) - end - - def process_query_intl(count=false) - if @models == [] - @result = [] - return @result - end - - @include << @include_embed if @include_embed.length > 0 - - @predicates = unmmaped_predicates() - @equivalent_predicates = retrieve_equivalent_predicates() - - options_load = { models: @models, include: @include, ids: @ids, - graph_match: @pattern, klass: @klass, - filters: @filters, order_by: @order_by , - read_only: @read_only, rules: @rules, - predicates: @predicates, - no_graphs: @no_graphs, - equivalent_predicates: @equivalent_predicates } - - options_load.merge!(@where_options_load) if @where_options_load - if !@klass.collection_opts.nil? and !options_load.include?(:collection) - raise ArgumentError, "Collection needed call `#{@klass.name}`" - end - - ids = nil - if @index_key - raise ArgumentError, "Redis is not configured" unless Goo.redis_client - rclient = Goo.redis_client - cache_key = cache_key_for_index(@index_key) - raise ArgumentError, "Index not found" unless rclient.exists(cache_key) - if @page_i - if !@count - @count = rclient.llen(cache_key) - end - rstart = (@page_i -1) * @page_size - rstop = (rstart + @page_size) -1 - ids = rclient.lrange(cache_key,rstart,rstop) - else - ids = rclient.lrange(cache_key,0,-1) - end - ids = ids.map { |i| RDF::URI.new(i) } - end - - if @page_i && !@index_key - page_options = options_load.dup - page_options.delete(:include) - page_options[:include_pagination] = @include - if not @pre_count.nil? - @count = @pre_count - else - if !@count && @do_count - page_options[:count] = :count - @count = Goo::SPARQL::Queries.model_load(page_options).to_i - end - end - page_options.delete :count - page_options[:query_options] = @query_options - page_options[:page] = { page_i: @page_i, page_size: @page_size } - models_by_id = Goo::SPARQL::Queries.model_load(page_options) - options_load[:models] = models_by_id.values - - #models give the constraint - options_load.delete :graph_match - elsif count - count_options = options_load.dup - count_options.delete(:include) - count_options[:count] = :count - return Goo::SPARQL::Queries.model_load(count_options).to_i - end - - if @indexing - #do not care about include values - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - return @result - end - - options_load[:ids] = ids if ids - models_by_id = {} - if (@page_i && options_load[:models].length > 0) || - (!@page_i && (@count.nil? || @count > 0)) - models_by_id = Goo::SPARQL::Queries.model_load(options_load) - if @aggregate - if models_by_id.length > 0 - options_load_agg = { models: models_by_id.values, klass: @klass, - filters: @filters, read_only: @read_only, - aggregate: @aggregate, rules: @rules } - - options_load_agg.merge!(@where_options_load) if @where_options_load - Goo::SPARQL::Queries.model_load(options_load_agg) - end - end - end - unless @page_i - @result = @models ? @models : models_by_id.values - else - @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) - end - @result + process_query_call(count = count) end def disable_rules diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index b3b4f4c4..ff51e8b7 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -25,7 +25,7 @@ def config(&block) @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false - + @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" puts "(GOO) >> Using term search server at #{@settings.search_server_url}" puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" diff --git a/lib/goo/search/search.rb b/lib/goo/search/search.rb index 2987b4e2..377a3429 100644 --- a/lib/goo/search/search.rb +++ b/lib/goo/search/search.rb @@ -90,6 +90,17 @@ def unindexByQuery(query, connection_name=:main) Goo.search_connection(connection_name).delete_by_query(query) end + # Get the doc that will be indexed in solr + def get_indexable_object() + # To make the code less readable the guys that wrote it managed to hide the real function called by this line + # It is "get_index_doc" in ontologies_linked_data Class.rb + doc = self.class.model_settings[:search_options][:document].call(self) + doc[:resource_id] = doc[:id].to_s + doc[:id] = get_index_id.to_s + # id: clsUri_ONTO-ACRO_submissionNumber. i.e.: http://lod.nal.usda.gov/nalt/5260_NALT_4 + doc + end + def indexCommit(attrs=nil, connection_name=:main) Goo.search_connection(connection_name).commit(:commit_attributes => attrs || {}) end diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 8f7ad9e1..cf958398 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -14,7 +14,6 @@ class Client < RSPARQL::Client "text/x-nquads" => "nquads" } - BACKEND_4STORE = "4store" def status_based_sleep_time(operation) sleep(0.5) @@ -39,16 +38,17 @@ def status_based_sleep_time(operation) end class DropGraph - def initialize(g) + def initialize(g, silent: false) @graph = g @caching_options = { :graph => @graph.to_s } + @silent = silent end def to_s - return "DROP GRAPH <#{@graph.to_s}>" + "DROP #{@silent ? 'SILENT' : ''} GRAPH <#{@graph.to_s}>" end def options #Returns the caching option - return @caching_options + @caching_options end end @@ -77,7 +77,7 @@ def bnodes_filter_file(file_path,mime_type) end def delete_data_graph(graph) - Goo.sparql_update_client.update(DropGraph.new(graph)) + Goo.sparql_update_client.update(DropGraph.new(graph, silent: Goo.backend_vo?)) end def append_triples_no_bnodes(graph,file_path,mime_type_in) @@ -184,9 +184,7 @@ def status resp end - private - - def execute_append_request(graph, data_file, mime_type_in) + def params_for_backend(graph, data_file, mime_type_in, method = :post) mime_type = "text/turtle" if mime_type_in == "text/x-nquads" @@ -194,10 +192,9 @@ def execute_append_request(graph, data_file, mime_type_in) graph = "http://data.bogus.graph/uri" end - params = {method: :post, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - backend_name = Goo.sparql_backend_name + params = {method: method, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - if backend_name == BACKEND_4STORE + if Goo.backend_4s? params[:payload] = { graph: graph.to_s, data: data_file, @@ -205,12 +202,18 @@ def execute_append_request(graph, data_file, mime_type_in) } #for some reason \\\\ breaks parsing params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") + elsif Goo.backend_vo? + params[:url] = "http://localhost:8890/sparql-graph-crud?graph=#{CGI.escape(graph.to_s)}" + params[:payload] = data_file else params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" params[:payload] = data_file end + params + end - RestClient::Request.execute(params) + def execute_append_request(graph, data_file, mime_type_in) + RestClient::Request.execute(params_for_backend(graph, data_file, mime_type_in)) end end end diff --git a/lib/goo/sparql/loader.rb b/lib/goo/sparql/loader.rb index 821aba26..f3dcdb3d 100644 --- a/lib/goo/sparql/loader.rb +++ b/lib/goo/sparql/loader.rb @@ -1,3 +1,4 @@ +require 'request_store' module Goo module SPARQL module Loader @@ -6,8 +7,10 @@ class << self def model_load(*options) options = options.last + set_request_lang(options) if options[:models] && options[:models].is_a?(Array) && \ (options[:models].length > Goo.slice_loading_size) + options = options.dup models = options[:models] include_options = options[:include] @@ -33,78 +36,56 @@ def model_load(*options) ## def model_load_sliced(*options) options = options.last - ids = options[:ids] klass = options[:klass] incl = options[:include] models = options[:models] - aggregate = options[:aggregate] - read_only = options[:read_only] collection = options[:collection] - count = options[:count] - include_pagination = options[:include_pagination] - equivalent_predicates = options[:equivalent_predicates] - predicates = options[:predicates] - - embed_struct, klass_struct = get_structures(aggregate, count, incl, include_pagination, klass, read_only) - raise_resource_must_persistent_error(models) if models + embed_struct, klass_struct = get_structures(options[:aggregate], options[:count] , incl, options[:include_pagination], klass, options[:read_only]) + raise_not_persistent_error(models) if models graphs = get_graphs(collection, klass) - ids, models_by_id = get_models_by_id_hash(ids, klass, klass_struct, models) + models_by_id = get_models_by_id_hash( options[:ids], klass, klass_struct, models) - query_options = {} #TODO: breaks the reasoner patterns = [[:id, RDF.type, klass.uri_type(collection)]] incl_embed = nil - unmapped = nil bnode_extraction = nil properties_to_include = [] variables = [:id] - if incl - if incl.first && incl.first.is_a?(Hash) && incl.first.include?(:bnode) + if incl && !incl.empty? + if incl.first.is_a?(Hash) && incl.first.include?(:bnode) #limitation only one level BNODE bnode_extraction, patterns, variables = get_bnode_extraction(collection, incl, klass, patterns) else variables = %i[id attributeProperty attributeObject] if incl.first == :unmapped - unmapped = true - properties_to_include = predicate_map(predicates) + properties_to_include = predicate_map(options[:predicates]) else - #make it deterministic - incl_embed = get_embed_includes(incl) - graphs, properties_to_include, query_options = get_includes(collection, graphs, incl, - klass, query_options) + graphs, properties_to_include, incl_embed = get_includes(collection, graphs, incl, klass) end end end - expand_equivalent_predicates(properties_to_include, equivalent_predicates) - query_builder = Goo::SPARQL::QueryBuilder.new options - select, aggregate_projections = query_builder.build_select_query(ids, variables, graphs, - patterns, query_options, - properties_to_include) + options[:properties_to_include] = properties_to_include + + + select, aggregate_projections = Goo::SPARQL::QueryBuilder.new(options) + .build_query(models_by_id.keys, variables, graphs, patterns) solution_mapper = Goo::SPARQL::SolutionMapper.new aggregate_projections, bnode_extraction, embed_struct, incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, - variables, ids, options + variables, options solution_mapper.map_each_solutions(select) end private - def expand_equivalent_predicates(properties_to_include, eq_p) - - return unless eq_p && !eq_p.empty? - - properties_to_include&.each do |property_attr, property| - property_uri = property[:uri] - property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) - end - + def set_request_lang(options) + options[:requested_lang] = RequestStore.store[:requested_lang] end def predicate_map(predicates) @@ -126,19 +107,19 @@ def predicate_map(predicates) predicates_map end - def get_includes(collection, graphs, incl, klass, query_options) + def get_includes(collection, graphs, incl, klass) + incl_embed ,incl = get_embed_includes(incl) incl = incl.to_a incl.delete_if { |a| !a.instance_of?(Symbol) } properties_to_include = {} incl.each do |attr| graph, pattern = query_pattern(klass, attr, collection: collection) - add_rules(attr, klass, query_options) if klass.attributes(:all).include?(attr) properties_to_include[attr] = { uri: pattern[1], is_inverse: klass.inverse?(attr) } # [property_attr, property_uri , inverse: true] end graphs << graph if graph && (!klass.collection_opts || klass.inverse?(attr)) end - [graphs, properties_to_include,query_options] + [graphs, properties_to_include, incl_embed] end def get_bnode_extraction(collection, incl, klass, patterns) @@ -175,7 +156,7 @@ def get_models_by_id_hash(ids, klass, klass_struct, models) #a where without models end - return ids, models_by_id + models_by_id end def get_graphs(collection, klass) @@ -228,7 +209,7 @@ def get_structures(aggregate, count, incl, include_pagination, klass, read_only) [embed_struct, klass_struct] end - def raise_resource_must_persistent_error(models) + def raise_not_persistent_error(models) models.each do |m| if (not m.nil?) && !m.respond_to?(:klass) #read only raise ArgumentError, @@ -246,7 +227,7 @@ def get_embed_includes(incl) #variables.concat(embed_variables) incl.concat(embed_variables) end - incl_embed + [incl_embed, incl] end end diff --git a/lib/goo/sparql/mixins/query_pattern.rb b/lib/goo/sparql/mixins/query_pattern.rb index cc370795..9ee0df7d 100644 --- a/lib/goo/sparql/mixins/query_pattern.rb +++ b/lib/goo/sparql/mixins/query_pattern.rb @@ -3,9 +3,6 @@ module SPARQL module QueryPatterns - def add_rules(attr,klass,query_options) - (query_options[:rules] ||= []) << :SUBC if klass.transitive?(attr) - end def query_pattern(klass,attr,**opts) value = opts[:value] || nil diff --git a/lib/goo/sparql/mixins/solution_lang_filter.rb b/lib/goo/sparql/mixins/solution_lang_filter.rb new file mode 100644 index 00000000..3a751b13 --- /dev/null +++ b/lib/goo/sparql/mixins/solution_lang_filter.rb @@ -0,0 +1,176 @@ +module Goo + module SPARQL + module Solution + class LanguageFilter + + attr_reader :requested_lang, :unmapped, :objects_by_lang + + def initialize(requested_lang: RequestStore.store[:requested_lang], unmapped: false, list_attributes: []) + @list_attributes = list_attributes + @objects_by_lang = {} + @unmapped = unmapped + @requested_lang = get_language(requested_lang) + end + + def fill_models_with_all_languages(models_by_id) + objects_by_lang.each do |id, predicates| + model = models_by_id[id] + predicates.each do |predicate, values| + + if values.values.all? { |v| v.all? { |x| literal?(x) && x.plain?} } + pull_stored_values(model, values, predicate, @unmapped) + end + end + end + end + + + def set_model_value(model, predicate, values, value) + set_value(model, predicate, value) do + model.send("#{predicate}=", values, on_load: true) + end + end + + def set_unmapped_value(model, predicate, value) + set_value(model, predicate, value) do + return add_unmapped_to_model(model, predicate, value) + end + end + + def models_unmapped_to_array(m) + if show_all_languages? + model_group_by_lang(m) + else + m.unmmaped_to_array + end + end + + private + + + def set_value(model, predicate, value, &block) + language = object_language(value) + + if requested_lang.eql?(:ALL) || !literal?(value) || language_match?(language) + block.call + end + + if requested_lang.eql?(:ALL) || requested_lang.is_a?(Array) + language = "@none" if language.nil? || language.eql?(:no_lang) + store_objects_by_lang(model.id, predicate, value, language) + end + end + + def model_group_by_lang(model) + unmapped = model.unmapped + cpy = {} + + unmapped.each do |attr, v| + cpy[attr] = group_by_lang(v) + end + + model.unmapped = cpy + end + + def group_by_lang(values) + + return values.to_a if values.all?{|x| x.is_a?(RDF::URI) || !x.respond_to?(:language) } + + values = values.group_by { |x| x.respond_to?(:language) && x.language ? x.language.to_s.downcase : :none } + + no_lang = values[:none] || [] + return no_lang if !no_lang.empty? && no_lang.all? { |x| x.respond_to?(:plain?) && !x.plain? } + + values + end + + + def object_language(new_value) + new_value.language || :no_lang if new_value.is_a?(RDF::Literal) + end + + def language_match?(language) + # no_lang means that the object is not a literal + return true if language.eql?(:no_lang) + + return requested_lang.include?(language) if requested_lang.is_a?(Array) + + language&.upcase.eql?(requested_lang) + end + + def literal?(object) + !object_language(object).nil? + end + + def store_objects_by_lang(id, predicate, object, language) + # store objects in this format: [id][predicate][language] = [objects] + return if requested_lang.is_a?(Array) && !requested_lang.include?(language) + + language_key = language.downcase + + objects_by_lang[id] ||= {} + objects_by_lang[id][predicate] ||= {} + objects_by_lang[id][predicate][language_key] ||= [] + + objects_by_lang[id][predicate][language_key] << object + end + + + def add_unmapped_to_model(model, predicate, value) + + if model.respond_to? :klass # struct + model[:unmapped] ||= {} + model[:unmapped][predicate] ||= [] + model[:unmapped][predicate] << value unless value.nil? + else + model.unmapped_set(predicate, value) + end + end + + def pull_stored_values(model, values, predicate, unmapped) + if unmapped + add_unmapped_to_model(model, predicate, values) + else + values = values.map do |language, values_literals| + values_string = values_literals.map{|x| x.object} + values_string = values_string.first unless list_attributes?(predicate) + [language, values_string] + end.to_h + + model.send("#{predicate}=", values, on_load: true) + end + + end + + def unmapped_get(model, predicate) + if model && model.respond_to?(:klass) # struct + model[:unmapped]&.dig(predicate) + else + model.unmapped_get(predicate) + end + + end + + def list_attributes?(predicate) + @list_attributes.include?(predicate) + end + + + def show_all_languages? + @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) + end + + def get_language(languages) + languages = portal_language if languages.nil? || languages.empty? + lang = languages.to_s.split(',').map { |l| l.upcase.to_sym } + lang.length == 1 ? lang.first : lang + end + + def portal_language + Goo.main_languages.first + end + + end + end + end +end diff --git a/lib/goo/sparql/processor.rb b/lib/goo/sparql/processor.rb new file mode 100644 index 00000000..c89778ed --- /dev/null +++ b/lib/goo/sparql/processor.rb @@ -0,0 +1,137 @@ +module Goo + module SPARQL + module Processor + def process_query_call(count=false) + if Goo.queries_debug? && Thread.current[:ncbo_debug] + start = Time.now + query_resp = process_query_intl(count=count) + (Thread.current[:ncbo_debug][:goo_process_query] ||= []) << (Time.now - start) + return query_resp + end + process_query_init(count=count) + end + + private + def process_query_init(count=false) + if @models == [] + @result = [] + return @result + end + + @include << @include_embed if @include_embed.length > 0 + + @predicates = unmmaped_predicates() + @equivalent_predicates = retrieve_equivalent_predicates() + + options_load = { models: @models, include: @include, ids: @ids, + graph_match: @pattern, klass: @klass, + filters: @filters, order_by: @order_by , + read_only: @read_only, rules: @rules, + predicates: @predicates, + no_graphs: @no_graphs, + equivalent_predicates: @equivalent_predicates } + + options_load.merge!(@where_options_load) if @where_options_load + + if !@klass.collection_opts.nil? and !options_load.include?(:collection) + raise ArgumentError, "Collection needed call `#{@klass.name}`" + end + + ids = nil + + + ids = redis_indexed_ids if use_redis_index? + + if @page_i && !use_redis_index? + page_options = options_load.dup + page_options.delete(:include) + page_options[:include_pagination] = @include + page_options[:query_options] = @query_options + + @count = run_count_query(page_options) + page_options[:page] = { page_i: @page_i, page_size: @page_size } + + models_by_id = Goo::SPARQL::Queries.model_load(page_options) + options_load[:models] = models_by_id.values + #models give the constraint + options_load.delete :graph_match + elsif count + count_options = options_load.dup + count_options.delete(:include) + return run_count_query(count_options) + end + + if @indexing + #do not care about include values + @result = Goo::Base::Page.new(@page_i,@page_size,@count,models_by_id.values) + return @result + end + + options_load[:ids] = ids if ids + models_by_id = {} + + if (@page_i && options_load[:models].nil?) || + (@page_i && options_load[:models].length > 0) || + (!@page_i && (@count.nil? || @count > 0)) + + models_by_id = Goo::SPARQL::Queries.model_load(options_load) + run_aggregate_query(models_by_id) if @aggregate && models_by_id.length > 0 + end + + if @page_i + @result = Goo::Base::Page.new(@page_i, @page_size, @count, models_by_id.values) + else + @result = @models ? @models : models_by_id.values + end + @result + end + + + def use_redis_index? + @index_key + end + + def run_aggregate_query(models_by_id) + options_load_agg = { models: models_by_id.values, klass: @klass, + filters: @filters, read_only: @read_only, + aggregate: @aggregate, rules: @rules } + options_load_agg.merge!(@where_options_load) if @where_options_load + Goo::SPARQL::Queries.model_load(options_load_agg) + end + def run_count_query(page_options) + count = 0 + if @pre_count + count = @pre_count + elsif !@count && @do_count + page_options[:count] = :count + r = Goo::SPARQL::Queries.model_load(page_options) + if r.is_a? Numeric + count = r.to_i + end + elsif @count + count = @count + end + page_options.delete :count + count + end + + def redis_indexed_ids + raise ArgumentError, "Redis is not configured" unless Goo.redis_client + rclient = Goo.redis_client + cache_key = cache_key_for_index(@index_key) + raise ArgumentError, "Index not found" unless rclient.exists(cache_key) + if @page_i + if !@count + @count = rclient.llen(cache_key) + end + rstart = (@page_i -1) * @page_size + rstop = (rstart + @page_size) -1 + ids = rclient.lrange(cache_key,rstart,rstop) + else + ids = rclient.lrange(cache_key,0,-1) + end + ids = ids.map { |i| RDF::URI.new(i) } + end + end + end +end diff --git a/lib/goo/sparql/query_builder.rb b/lib/goo/sparql/query_builder.rb index 7ef72193..1a6ea740 100644 --- a/lib/goo/sparql/query_builder.rb +++ b/lib/goo/sparql/query_builder.rb @@ -14,32 +14,31 @@ def initialize(options) @unions = options[:unions] || [] @aggregate = options[:aggregate] @collection = options[:collection] - @model_query_options = options[:query_options] @enable_rules = options[:rules] @order_by = options[:order_by] - + @internal_variables_map = {} + @equivalent_predicates = options[:equivalent_predicates] + @properties_to_include = options[:properties_to_include] @query = get_client end - def build_select_query(ids, variables, graphs, patterns, - query_options, properties_to_include) - - internal_variables = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) + def build_query(ids, variables, graphs, patterns) + query_options = {} - aggregate_projections, aggregate_vars, - variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, - @klass, @unions, variables, internal_variables) + expand_equivalent_predicates(@properties_to_include, @equivalent_predicates) - @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables) + properties_to_include = @properties_to_include + patterns = graph_match(@collection, @graph_match, graphs, @klass, patterns, query_options, @unions) variables, patterns = add_some_type_to_id(patterns, query_options, variables) - - query_filter_str, patterns, optional_patterns = - filter_query_strings(@collection, graphs, internal_variables, @klass, optional_patterns, patterns, @query_filters) + aggregate_projections, aggregate_vars, variables, optional_patterns = get_aggregate_vars(@aggregate, @collection, graphs, @klass, @unions, variables) + query_filter_str, patterns, optional_patterns, filter_variables = + filter_query_strings(@collection, graphs, @klass, optional_patterns, patterns, @query_filters) + @order_by, variables, optional_patterns = init_order_by(@count, @klass, @order_by, optional_patterns, variables,patterns, query_options, graphs) variables = [] if @count variables.delete :some_type - select_distinct(variables, aggregate_projections) + select_distinct(variables, aggregate_projections, filter_variables) .from(graphs) .where(patterns) .union_bind_in_where(properties_to_include) @@ -55,42 +54,45 @@ def build_select_query(ids, variables, graphs, patterns, @query.union(*@unions) unless @unions.empty? ids_filter(ids) if ids - order_by if @order_by # TODO test if work + + + @query.order_by(*order_by_string) if @order_by + put_query_aggregate_vars(aggregate_vars) if aggregate_vars count if @count paginate if @page - ## TODO see usage of rules and query_options - query_options.merge!(@model_query_options) if @model_query_options - query_options[:rules] = [:NONE] unless @enable_rules - query_options = nil if query_options.empty? - if query_options - query_options[:rules] = query_options[:rules]&.map { |x| x.to_s }.join('+') - else - query_options = { rules: ['NONE'] } - end - @query.options[:query_options] = query_options [@query, aggregate_projections] end def union_bind_in_where(properties) binding_as = [] - properties.each do |property_attr, property| - predicates = [property[:uri]] + (property[:equivalents] || []) - options = { - binds: [{ value: property_attr, as: :attributeProperty }] - } - subject = property[:subject] || :id - predicates.uniq.each do |predicate_uri| - pattern = if property[:is_inverse] - [:attributeObject, predicate_uri, subject] - else - [subject, predicate_uri, :attributeObject] - end - binding_as << [[pattern], options] + if Goo.backend_4s? || Goo.backend_gb? + properties.each do |property_attr, property| + predicates = [property[:uri]] + (property[:equivalents] || []) + options = { + binds: [{ value: property_attr, as: :attributeProperty }] + } + subject = property[:subject] || :id + predicates.uniq.each do |predicate_uri| + pattern = if property[:is_inverse] + [:attributeObject, predicate_uri, subject] + else + [subject, predicate_uri, :attributeObject] + end + binding_as << [[pattern], options] + end end + + else + direct_predicate, inverse_predicate = include_properties + direct_filter = direct_predicate.empty? ? [] : [{ values: direct_predicate, predicate: :attributeProperty }] + inverse_filter = inverse_predicate.empty? ? [] : [{ values: inverse_predicate, predicate: :attributeProperty }] + binding_as << [[[:id, :attributeProperty, :attributeObject]], { filters: direct_filter}] unless direct_filter.empty? + binding_as << [[[:inverseAttributeObject, :attributeProperty, :id]], { filters: inverse_filter}] unless inverse_filter.empty? end + @query.optional_union_with_bind_as(*binding_as) unless binding_as.empty? self end @@ -117,10 +119,17 @@ def put_query_aggregate_vars(aggregate_vars) self end - def order_by - order_by_str = @order_by.map { |attr, order| "#{order.to_s.upcase}(?#{attr})" } - @query.order_by(*order_by_str) - self + def order_by_string + order_variables = [] + order_str = @order_by&.map do |attr, order| + if order.is_a?(Hash) + sub_attr, order = order.first + attr = @internal_variables_map.select{ |internal_var, attr_var| attr_var.eql?({attr => sub_attr}) || attr_var.eql?(sub_attr)}.keys.last + end + order_variables << attr + "#{order.to_s.upcase}(?#{attr})" + end + order_str end def from(graphs) @@ -135,10 +144,11 @@ def from(graphs) self end - def select_distinct(variables, aggregate_projections) - + def select_distinct(variables, aggregate_patterns, filter_variables) + variables << :inverseAttributeObject if inverse_predicate? select_vars = variables.dup - reject_aggregations_from_vars(select_vars, aggregate_projections) if aggregate_projections + reject_aggregations_from_vars(select_vars, aggregate_patterns) if aggregate_patterns + select_vars = (select_vars + filter_variables).uniq if @page && Goo.backend_4s? # Fix for 4store pagination with a filter @query = @query.select(*select_vars).distinct(true) self end @@ -147,7 +157,7 @@ def ids_filter(ids) filter_id = [] ids.each do |id| - filter_id << "?id = #{id.to_ntriples.to_s}" + filter_id << "?id = #{id.to_ntriples.to_s.gsub(' ', '%20')}" end filter_id_str = filter_id.join ' || ' @query.filter filter_id_str @@ -156,25 +166,37 @@ def ids_filter(ids) private + def include_properties + direct_predicates = @properties_to_include.select { |_, property| !property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + inverse_predicates = @properties_to_include.select { |_, property| property[:is_inverse] }.map { |_, property| [property[:uri]] + (property[:equivalents] || []) }.flatten + [direct_predicates, inverse_predicates] + end + + def inverse_predicate? + @properties_to_include.any? { |_, property| property[:is_inverse] } + end + def patterns_for_match(klass, attr, value, graphs, patterns, unions, internal_variables, subject = :id, in_union = false, in_aggregate = false, query_options = {}, collection = nil) + new_internal_var = value if value.respond_to?(:each) || value.instance_of?(Symbol) next_pattern = value.instance_of?(Array) ? value.first : value #for filters next_pattern = { next_pattern => [] } if next_pattern.instance_of?(Symbol) - value = "internal_join_var_#{internal_variables.length}".to_sym + new_internal_var = "internal_join_var_#{internal_variables.length}".to_sym if in_aggregate - value = "#{attr}_agg_#{in_aggregate}".to_sym + new_internal_var = "#{attr}_agg_#{in_aggregate}".to_sym end - internal_variables << value + internal_variables << new_internal_var + @internal_variables_map[new_internal_var] = value.empty? ? attr : {attr => value} end - add_rules(attr, klass, query_options) + graph, pattern = - query_pattern(klass, attr, value: value, subject: subject, collection: collection) + query_pattern(klass, attr, value: new_internal_var, subject: subject, collection: collection) if pattern if !in_union patterns << pattern @@ -187,7 +209,7 @@ def patterns_for_match(klass, attr, value, graphs, patterns, unions, range = klass.range(attr) next_pattern.each do |next_attr, next_value| patterns_for_match(range, next_attr, next_value, graphs, - patterns, unions, internal_variables, subject = value, + patterns, unions, internal_variables, subject = new_internal_var, in_union, in_aggregate, collection = collection) end end @@ -210,7 +232,7 @@ def walk_pattern(klass, match_patterns, graphs, patterns, unions, end end - def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables, internal_variables) + def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables) # mdorf, 6/03/20 If aggregate projections (sub-SELECT within main SELECT) use an alias, that alias cannot appear in the main SELECT # https://github.com/ncbo/goo/issues/106 # See last sentence in https://www.w3.org/TR/sparql11-query/#aggregateExample @@ -241,8 +263,6 @@ def get_aggregate_vars(aggregate, collection, graphs, klass, unions, variables, end def graph_match(collection, graph_match, graphs, klass, patterns, query_options, unions) - internal_variables = [] - if graph_match #make it deterministic - for caching graph_match_iteration = Goo::Base::PatternIteration.new(graph_match) @@ -250,28 +270,58 @@ def graph_match(collection, graph_match, graphs, klass, patterns, query_options, internal_variables, in_aggregate = false, query_options, collection) graphs.uniq! end - internal_variables + patterns end def get_client Goo.sparql_query_client(@store) end - def init_order_by(count, klass, order_by, optional_patterns, variables) + def init_order_by(count, klass, order_by, optional_patterns, variables, patterns, query_options, graphs) order_by = nil if count if order_by order_by = order_by.first #simple ordering ... needs to use pattern inspection order_by.each do |attr, direction| - quad = query_pattern(klass, attr) - optional_patterns << quad[1] + + if direction.is_a?(Hash) + # TODO this part can be improved/refactored, the complexity was added because order by don't work + # if the pattern is in the mandatory ones (variable `patterns`) + # and optional (variable `optional_patterns`) at the same time + sub_attr, direction = direction.first + graph_match_iteration = Goo::Base::PatternIteration.new(Goo::Base::Pattern.new({attr => [sub_attr]})) + old_internal = internal_variables.dup + old_patterns = optional_patterns.dup + + walk_pattern(klass, graph_match_iteration, graphs, optional_patterns, @unions, internal_variables, in_aggregate = false, query_options, @collection) + new_variables = (internal_variables - old_internal) + internal_variables.delete(new_variables) + new_patterns = optional_patterns - old_patterns + already_existent_pattern = patterns.select{|x| x[1].eql?(new_patterns.last[1])}.first + + if already_existent_pattern + already_existent_variable = already_existent_pattern[2] + optional_patterns = old_patterns + key = @internal_variables_map.select{|key, value| key.eql?(new_variables.last)}.keys.first + @internal_variables_map[key] = (already_existent_variable || new_variables.last) if key + + #variables << already_existent_variable + else + #variables << new_variables.last + end + + else + quad = query_pattern(klass, attr) + optional_patterns << quad[1] + #variables << attr + end + #patterns << quad[1] #mdorf, 9/22/16 If an ORDER BY clause exists, the columns used in the ORDER BY should be present in the SPARQL select #variables << attr unless variables.include?(attr) end - variables = %i[id attributeProperty attributeObject] end - [order_by, variables, optional_patterns] + [order_by, variables, optional_patterns, patterns] end def sparql_op_string(op) @@ -310,38 +360,45 @@ def query_filter_sparql(klass, filter, filter_patterns, filter_graphs, end filter_var = inspected_patterns[filter_pattern_match] - if !filter_operation.value.instance_of?(Goo::Filter) - if filter_operation.operator == :unbound || filter_operation.operator == :bound - if filter_operation.operator == :unbound - filter_operations << "!BOUND(?#{filter_var.to_s})" - else - filter_operations << "BOUND(?#{filter_var.to_s})" - end + if filter_operation.value.instance_of?(Goo::Filter) + filter_operations << "#{sparql_op_string(filter_operation.operator)}" + query_filter_sparql(klass, filter_operation.value, filter_patterns, + filter_graphs, filter_operations, + internal_variables, inspected_patterns, collection) + else + case filter_operation.operator + when :unbound + filter_operations << "!BOUND(?#{filter_var.to_s})" + return :optional + + when :bound + filter_operations << "BOUND(?#{filter_var.to_s})" return :optional + when :regex + if filter_operation.value.is_a?(String) + filter_operations << "REGEX(STR(?#{filter_var.to_s}) , \"#{filter_operation.value.to_s}\", \"i\")" + end + else value = RDF::Literal.new(filter_operation.value) if filter_operation.value.is_a? String - value = RDF::Literal.new(filter_operation.value, :datatype => RDF::XSD.string) + value = RDF::Literal.new(filter_operation.value) end filter_operations << ( "?#{filter_var.to_s} #{sparql_op_string(filter_operation.operator)} " + " #{value.to_ntriples}") end - else - filter_operations << "#{sparql_op_string(filter_operation.operator)}" - query_filter_sparql(klass, filter_operation.value, filter_patterns, - filter_graphs, filter_operations, - internal_variables, inspected_patterns, collection) + end end end - def filter_query_strings(collection, graphs, internal_variables, klass, + def filter_query_strings(collection, graphs, klass, optional_patterns, patterns, query_filters) query_filter_str = [] - filter_graphs = [] + filter_variables = [] inspected_patterns = {} query_filters&.each do |query_filter| filter_operations = [] @@ -358,9 +415,9 @@ def filter_query_strings(collection, graphs, internal_variables, klass, patterns.concat(filter_patterns) end end + #filter_variables << inspected_patterns.values.last end - - [query_filter_str, patterns, optional_patterns, internal_variables] + [query_filter_str, patterns, optional_patterns, filter_variables] end def reject_aggregations_from_vars(variables, aggregate_projections) @@ -376,6 +433,19 @@ def add_some_type_to_id(patterns, query_options, variables) [variables, patterns] end + def internal_variables + @internal_variables_map.keys + end + + def expand_equivalent_predicates(query_properties, eq_p) + + return unless eq_p && !eq_p.empty? + + query_properties&.each do |_, property| + property_uri = property[:uri] + property[:equivalents] = eq_p[property_uri.to_s].to_a.map { |p| RDF::URI.new(p) } if eq_p.include?(property_uri.to_s) + end + end end end end diff --git a/lib/goo/sparql/solutions_mapper.rb b/lib/goo/sparql/solutions_mapper.rb index 954ceca9..483bf864 100644 --- a/lib/goo/sparql/solutions_mapper.rb +++ b/lib/goo/sparql/solutions_mapper.rb @@ -1,39 +1,35 @@ module Goo module SPARQL class SolutionMapper - BNODES_TUPLES = Struct.new(:id, :attribute) - def initialize(aggregate_projections, bnode_extraction, embed_struct, - incl_embed, klass_struct, models_by_id, - properties_to_include, unmapped, variables,ids, options) + def initialize(aggregate_projections, bnode_extraction, embed_struct,incl_embed, klass_struct, models_by_id, variables, options) @aggregate_projections = aggregate_projections @bnode_extraction = bnode_extraction @embed_struct = embed_struct @incl_embed = incl_embed + @incl = options[:include] @klass_struct = klass_struct @models_by_id = models_by_id - @properties_to_include = properties_to_include - @unmapped = unmapped + @properties_to_include = options[:properties_to_include] + @unmapped = options[:include] && options[:include].first.eql?(:unmapped) @variables = variables - @ids = ids - @klass = options[:klass] + @ids = models_by_id.keys @klass = options[:klass] @read_only = options[:read_only] - @incl = options[:include] @count = options[:count] @collection = options[:collection] + @options = options end def map_each_solutions(select) - - found = Set.new objects_new = {} - var_set_hash = {} list_attributes = Set.new(@klass.attributes(:list)) - all_attributes = Set.new(@klass.attributes(:all)) + + @lang_filter = Goo::SPARQL::Solution::LanguageFilter.new(requested_lang: @options[:requested_lang].to_s, unmapped: @unmapped, + list_attributes: list_attributes) if @options[:page] # for using prefixes before queries @@ -53,6 +49,7 @@ def map_each_solutions(select) # File.write(debug_file, select.to_s + "\n", mode: 'a') if select.to_s =~ /OFFSET \d+ LIMIT 2500$/ select.each_solution do |sol| + next if sol[:some_type] && @klass.type_uri(@collection) != sol[:some_type] return sol[:count_var].object if @count @@ -79,56 +76,77 @@ def map_each_solutions(select) next end - v = sol[:attributeProperty].to_s.to_sym + predicates = find_predicate(sol[:attributeProperty], inverse: !sol[:inverseAttributeObject].nil?) + next if predicates.empty? - next if v.nil? || !all_attributes.include?(v) + object = if sol[:attributeObject] + sol[:attributeObject] + elsif sol[:inverseAttributeObject] + sol[:inverseAttributeObject] + end - object = sol[:attributeObject] - #bnodes - if bnode_id?(object, v) - objects_new = bnode_id_tuple(id, object, objects_new, v) - next + predicates.each do |predicate| + # bnodes + if bnode_id?(object, predicate) + objects_new = bnode_id_tuple(id, object, objects_new, predicate) + next + end + + objects, objects_new = get_value_object(id, objects_new, object, list_attributes, predicate) + add_object_to_model(id, objects, object, predicate) end - object, objects_new = get_value_object(id, objects_new, object, list_attributes, v) - add_object_to_model(id, object, v, var_set_hash) end + # for this moment we are not going to enrich models , maybe we will use it if the results are empty + @lang_filter.fill_models_with_all_languages(@models_by_id) # for troubleshooting specific queries (write 3 of 3) # File.write(debug_file, "\n\n", mode: 'a') if select.to_s =~ /OFFSET \d+ LIMIT 2500$/ - init_unloaded_attributes(found, list_attributes) + init_unloaded_attributes(list_attributes) return @models_by_id if @bnode_extraction model_set_collection_attributes(@models_by_id, objects_new) - #remove from models_by_id elements that were not touched - @models_by_id.select! { |k, m| found.include?(k) } + # remove from models_by_id elements that were not touched + @models_by_id.select! { |k, _m| found.include?(k) } models_set_all_persistent(@models_by_id) unless @read_only - #next level of embed attributes + # next level of embed attributes include_embed_attributes(@incl_embed, objects_new) if @incl_embed && !@incl_embed.empty? - #bnodes - blank_nodes = objects_new.select { |id, obj| id.is_a?(RDF::Node) && id.anonymous? } + # bnodes + blank_nodes = objects_new.select { |id, _obj| id.is_a?(RDF::Node) && id.anonymous? } include_bnodes(blank_nodes, @models_by_id) unless blank_nodes.empty? models_unmapped_to_array(@models_by_id) if @unmapped + @models_by_id end private - def init_unloaded_attributes(found, list_attributes) - return if @incl.nil? + def find_predicate(predicate, unmapped: false, inverse: false) + if Goo.backend_4s? || Goo.backend_gb? + return [] if predicate.nil? || unmapped && @properties_to_include[predicate].nil? + predicate = predicate.to_s.to_sym + else + predicate = @properties_to_include.select { |x, v| v[:uri].to_s.eql?(predicate.to_s) || v[:equivalents]&.any? { |e| e.to_s.eql?(predicate.to_s) } } + return [] if predicate.empty? + + predicate = predicate.select{|x, y| y[:is_inverse]&.eql?(inverse)}.keys + end + Array(predicate) + end + def init_unloaded_attributes(list_attributes) + return if @incl.nil? || @incl.empty? # Here we are setting to nil all attributes that have been included but not found in the triplestore - found.uniq.each do |model_id| - m = @models_by_id[model_id] + @models_by_id.each do |id, m| @incl.each do |attr_to_incl| is_handler = m.respond_to?(:handler?) && m.class.handler?(attr_to_incl) next if attr_to_incl.to_s.eql?('unmapped') || is_handler @@ -152,8 +170,7 @@ def init_unloaded_attributes(found, list_attributes) def get_value_object(id, objects_new, object, list_attributes, predicate) object = object.object if object && !(object.is_a? RDF::URI) range_for_v = @klass.range(predicate) - #binding.pry if v.eql?(:enrolled) - #dependent model creation + if object.is_a?(RDF::URI) && (predicate != :id) && !range_for_v.nil? if objects_new.include?(object) @@ -163,7 +180,7 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) object, objects_new = if !@read_only preloaded_or_new_object(object, objects_new, pre_val, predicate) else - #depedent read only + # depedent read only preloaded_or_new_struct(object, objects_new, pre_val, predicate) end else @@ -172,40 +189,27 @@ def get_value_object(id, objects_new, object, list_attributes, predicate) end if list_attributes.include?(predicate) - # To handle attr that are lists - pre = if @klass_struct - @models_by_id[id][predicate] - else - @models_by_id[id].instance_variable_get("@#{predicate}") - end - if object.nil? && pre.nil? - object = [] - elsif object.nil? && !pre.nil? - object = pre - elsif object - object = !pre ? [object] : (pre.dup << object) + pre = @klass_struct ? @models_by_id[id][predicate] : @models_by_id[id].instance_variable_get("@#{predicate}") + + if object.nil? + object = pre.nil? ? [] : pre + else + object = pre.nil? ? [object] : (Array(pre).dup << object) object.uniq! end + end - [object,objects_new] + [object, objects_new] end - def add_object_to_model(id, object, predicate, var_set_hash) + def add_object_to_model(id, objects, current_obj, predicate) + if @models_by_id[id].respond_to?(:klass) - @models_by_id[id][predicate] = object unless object.nil? && !@models_by_id[id][predicate].nil? + @models_by_id[id][predicate] = objects unless objects.nil? && !@models_by_id[id][predicate].nil? elsif !@models_by_id[id].class.handler?(predicate) && - !(object.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && + !(objects.nil? && !@models_by_id[id].instance_variable_get("@#{predicate}").nil?) && predicate != :id - # if multiple language values are included for a given property, set the - # corresponding model attribute to the English language value - NCBO-1662 - if object.is_a?(RDF::Literal) - key = "#{predicate}#__#{id}" - @models_by_id[id].send("#{predicate}=", object, on_load: true) unless var_set_hash[key] - lang = object.language - var_set_hash[key] = true if %i[EN en].include?(lang) - else - @models_by_id[id].send("#{predicate}=", object, on_load: true) - end + @lang_filter.set_model_value(@models_by_id[id], predicate, objects, current_obj) end end @@ -213,7 +217,7 @@ def get_preload_value(id, object, predicate) pre_val = nil if predicate_preloaded?(id, predicate) pre_val = preloaded_value(id, predicate) - pre_val = pre_val.select { |x| x.id == object }.first if pre_val.is_a?(Array) + pre_val = pre_val.select { |x| x.respond_to?(:id) && (x.id == object) }.first if pre_val.is_a?(Array) end pre_val end @@ -235,6 +239,7 @@ def preloaded_or_new_struct(object, objects_new, pre_val, predicate) def preloaded_value(id, predicate) if !@read_only @models_by_id[id].instance_variable_get("@#{predicate}") + else @models_by_id[id][predicate] end @@ -251,9 +256,7 @@ def bnode_id?(object, predicate) def bnode_id_tuple(id, object, objects_new, predicate) range = @klass.range(predicate) - if range.respond_to?(:new) - objects_new[object] = BNODES_TUPLES.new(id, predicate) - end + objects_new[object] = BNODES_TUPLES.new(id, predicate) if range.respond_to?(:new) objects_new end @@ -267,21 +270,13 @@ def create_model(id) @models_by_id[id] = create_class_model(id, @klass, @klass_struct) unless @models_by_id.include?(id) end - def model_set_unmapped(id, predicate, value) - - if @models_by_id[id].respond_to? :klass #struct - @models_by_id[id][:unmapped] ||= {} - (@models_by_id[id][:unmapped][predicate] ||= []) << value - else - @models_by_id[id].unmapped_set(predicate, value) - end - end def create_struct(bnode_extraction, models_by_id, sol, variables) list_attributes = Set.new(@klass.attributes(:list)) struct = @klass.range(bnode_extraction).new variables.each do |v| next if v == :id + svalue = sol[v] struct[v] = svalue.is_a?(RDF::Node) ? svalue : svalue.object end @@ -302,70 +297,78 @@ def create_class_model(id, klass, klass_struct) end def models_unmapped_to_array(models_by_id) - models_by_id.each do |idm, m| - m.unmmaped_to_array + models_by_id.each do |_idm, m| + @lang_filter.models_unmapped_to_array(m) end end + + def is_multiple_langs? + return true if @requested_lang.is_a?(Array) || @requested_lang.eql?(:ALL) + false + end + def include_bnodes(bnodes, models_by_id) - #group by attribute - attrs = bnodes.map { |x, y| y.attribute }.uniq + # group by attribute + attrs = bnodes.map { |_x, y| y.attribute }.uniq attrs.each do |attr| struct = @klass.range(attr) - #bnodes that are in a range of goo ground models - #for example parents and children in LD class models - #we skip this cases for the moment + # bnodes that are in a range of goo ground models + # for example parents and children in LD class models + # we skip this cases for the moment next if struct.respond_to?(:model_name) bnode_attrs = struct.new.to_h.keys - ids = bnodes.select { |x, y| y.attribute == attr }.map { |x, y| y.id } - @klass.where.models(models_by_id.select { |x, y| ids.include?(x) }.values) - .in(@collection) - .include(bnode: { attr => bnode_attrs }).all + ids = bnodes.select { |_x, y| y.attribute == attr }.map { |_x, y| y.id } + @klass.where.models(models_by_id.select { |x, _y| ids.include?(x) }.values) + .in(@collection) + .include(bnode: { attr => bnode_attrs }).all end end def include_embed_attributes(incl_embed, objects_new) incl_embed.each do |attr, next_attrs| - #anything to join ? + # anything to join ? attr_range = @klass.range(attr) next if attr_range.nil? - range_objs = objects_new.select { |id, obj| + + range_objs = objects_new.select do |_id, obj| obj.instance_of?(attr_range) || (obj.respond_to?(:klass) && obj[:klass] == attr_range) - }.values - unless range_objs.empty? - range_objs.uniq! - query = attr_range.where().models(range_objs).in(@collection).include(*next_attrs) - query = query.read_only if @read_only - query.all - end + end.values + next if range_objs.empty? + + range_objs.uniq! + query = attr_range.where.models(range_objs).in(@collection).include(*next_attrs) + query = query.read_only if @read_only + query.all end end def models_set_all_persistent(models_by_id) return unless @ids - models_by_id.each do |k, m| + + models_by_id.each do |_k, m| m.persistent = true end end def model_set_collection_attributes(models_by_id, objects_new) collection_value = get_collection_value - if collection_value - collection_attribute = @klass.collection_opts - models_by_id.each do |id, m| - m.send("#{collection_attribute}=", collection_value) - end - objects_new.each do |id, obj_new| - if obj_new.respond_to?(:klass) - collection_attribute = obj_new[:klass].collection_opts - obj_new[collection_attribute] = collection_value - elsif obj_new.class.respond_to?(:collection_opts) && - obj_new.class.collection_opts.instance_of?(Symbol) - collection_attribute = obj_new.class.collection_opts - obj_new.send("#{collection_attribute}=", collection_value) - end + return unless collection_value + + collection_attribute = @klass.collection_opts + models_by_id.each do |_id, m| + m.send("#{collection_attribute}=", collection_value) + end + objects_new.each do |_id, obj_new| + if obj_new.respond_to?(:klass) + collection_attribute = obj_new[:klass].collection_opts + obj_new[collection_attribute] = collection_value + elsif obj_new.class.respond_to?(:collection_opts) && + obj_new.class.collection_opts.instance_of?(Symbol) + collection_attribute = obj_new.class.collection_opts + obj_new.send("#{collection_attribute}=", collection_value) end end end @@ -373,36 +376,12 @@ def model_set_collection_attributes(models_by_id, objects_new) def get_collection_value collection_value = nil if @klass.collection_opts.instance_of?(Symbol) - if @collection.is_a?(Array) && (@collection.length == 1) - collection_value = @collection.first - end - if @collection.respond_to? :id - collection_value = @collection - end + collection_value = @collection.first if @collection.is_a?(Array) && (@collection.length == 1) + collection_value = @collection if @collection.respond_to? :id end collection_value end - def model_map_attributes_values(id, var_set_hash, models_by_id, object, sol, v) - if models_by_id[id].respond_to?(:klass) - models_by_id[id][v] = object if models_by_id[id][v].nil? - else - model_attribute_val = models_by_id[id].instance_variable_get("@#{v.to_s}") - if (!models_by_id[id].class.handler?(v) || model_attribute_val.nil?) && v != :id - # if multiple language values are included for a given property, set the - # corresponding model attribute to the English language value - NCBO-1662 - if sol[v].is_a?(RDF::Literal) - key = "#{v}#__#{id.to_s}" - models_by_id[id].send("#{v}=", object, on_load: true) unless var_set_hash[key] - lang = sol[v].language - var_set_hash[key] = true if %i[EN en EN en EN en].include?(lang) - else - models_by_id[id].send("#{v}=", object, on_load: true) - end - end - end - end - def object_to_array(id, klass_struct, models_by_id, object, predicate) pre = if klass_struct models_by_id[id][predicate] @@ -421,7 +400,6 @@ def object_to_array(id, klass_struct, models_by_id, object, predicate) end def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new, v, options) - read_only = options[:read_only] if object.is_a?(RDF::URI) && v != :id range_for_v = @klass.range(v) @@ -429,8 +407,8 @@ def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new if objects_new.include?(object) object = objects_new[object] elsif !range_for_v.inmutable? - pre_val = get_pre_val(id, models_by_id, object, v, read_only) - object = get_object_from_range(pre_val, embed_struct, object, objects_new, v, options) + pre_val = get_pre_val(id, models_by_id, object, v) + object = get_object_from_range(pre_val, embed_struct, object, objects_new, v) else object = range_for_v.find(object).first end @@ -440,13 +418,12 @@ def dependent_model_creation(embed_struct, id, models_by_id, object, objects_new end def get_object_from_range(pre_val, embed_struct, object, objects_new, predicate) - range_for_v = @klass.range(predicate) if !@read_only object = pre_val || @klass.range_object(predicate, object) objects_new[object.id] = object else - #depedent read only + # depedent read only struct = pre_val || embed_struct[predicate].new struct.id = object struct.klass = range_for_v @@ -472,15 +449,18 @@ def get_pre_val(id, models_by_id, object, predicate) pre_val end - def add_unmapped_to_model(sol) - predicate = sol[:attributeProperty].to_s.to_sym - return unless @properties_to_include[predicate] - - id = sol[:id] - value = sol[:attributeObject] - - model_set_unmapped(id, @properties_to_include[predicate][:uri], value) + predicates = find_predicate(sol[:attributeProperty]) + predicates.each do |predicate| + if Goo.backend_4s? || Goo.backend_gb? + predicate = @properties_to_include[predicate][:uri] + else + predicate = sol[:attributeProperty] + end + id = sol[:id] + value = sol[:attributeObject] + @lang_filter.set_unmapped_value(@models_by_id[id], predicate, value) + end end def add_aggregations_to_model(sol) @@ -498,4 +478,3 @@ def add_aggregations_to_model(sol) end end end - diff --git a/lib/goo/sparql/sparql.rb b/lib/goo/sparql/sparql.rb index dfd3d0a6..d5315cde 100644 --- a/lib/goo/sparql/sparql.rb +++ b/lib/goo/sparql/sparql.rb @@ -1,9 +1,11 @@ require "sparql/client" require_relative "mixins/query_pattern" +require_relative "mixins/solution_lang_filter" require_relative "query_builder" require_relative "solutions_mapper" require_relative "client" require_relative "triples" require_relative "loader" require_relative "queries" +require_relative 'processor' diff --git a/lib/goo/sparql/triples.rb b/lib/goo/sparql/triples.rb index cb840df9..df3f9f1d 100644 --- a/lib/goo/sparql/triples.rb +++ b/lib/goo/sparql/triples.rb @@ -67,16 +67,6 @@ def self.model_update_triples(model) unless model.persistent? graph_insert << [subject, RDF.type, model.class.uri_type(model.collection)] end - #set default values before saving - if not model.persistent? - model.class.attributes_with_defaults.each do |attr| - value = model.send("#{attr}") - if value.nil? - value = model.class.default(attr).call(model) - model.send("#{attr}=",value) - end - end - end model.modified_attributes.each do |attr| next if model.class.collection?(attr) diff --git a/lib/goo/validators/enforce.rb b/lib/goo/validators/enforce.rb index 4d0c09f4..d6f3816d 100644 --- a/lib/goo/validators/enforce.rb +++ b/lib/goo/validators/enforce.rb @@ -66,6 +66,17 @@ def enforce(inst,attr,value) errors_by_opt.length > 0 ? errors_by_opt : nil end + def enforce_callback(inst, attr) + callbacks = Array(inst.class.update_callbacks(attr)) + callbacks.each do |proc| + if instance_proc?(inst, proc) + call_proc(inst.method(proc), inst, attr) + elsif proc.is_a?(Proc) + call_proc(proc, inst, attr) + end + end + end + private def object_type(opt) @@ -117,6 +128,10 @@ def add_error(opt, err) def self.enforce(inst,attr,value) EnforceInstance.new.enforce(inst,attr,value) end + + def self.enforce_callbacks(inst, attr) + EnforceInstance.new.enforce_callback(inst, attr) + end end end end diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake index 912cb699..d9b334f4 100644 --- a/rakelib/docker_based_test.rake +++ b/rakelib/docker_based_test.rake @@ -3,43 +3,34 @@ desc 'Run unit tests with docker based backend' namespace :test do namespace :docker do - desc "clean docker images and volumes" - - task :clean do - system("docker compose down --volumes") - end task :up do system("docker compose up -d") || abort("Unable to start docker containers") end task :down do - system("docker compose --profile fs --profile ag stop") - system("docker compose --profile fs --profile ag kill") + #system("docker compose --profile fs --profile ag stop") + #system("docker compose --profile fs --profile ag kill") end desc "run tests with docker AG backend" task :ag do - ENV["GOO_BACKEND_NAME"]="AG" + ENV["GOO_BACKEND_NAME"]="allegrograph" ENV["GOO_PORT"]="10035" ENV["GOO_PATH_QUERY"]="/repositories/ontoportal_test" ENV["GOO_PATH_DATA"]="/repositories/ontoportal_test/statements" ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal_test/statements" ENV["COMPOSE_PROFILES"]="ag" Rake::Task["test:docker:up"].invoke - # AG takes some time to start and create databases/accounts # TODO: replace system curl command with native ruby code - printf("waiting for AllegroGraph container to initialize") - sec = 0 - until system("curl -m 3 -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") - sleep(1) - printf(".") - sec += 1 - next unless sec > 60 - - puts - Rake::Task["test:docker:down"].invoke - abort("\nAborted; can't initialise AllegroGraph container") + unless system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") + printf("waiting for AllegroGraph container to initialize") + sec = 0 + until system("curl -sf http://127.0.0.1:10035/repositories/ontoportal_test/status | grep -iqE '(^running|^lingering)' || exit 1") do + sleep(1) + printf(".") + sec += 1 + end end - puts + puts system("docker compose ps") # TODO: remove after GH actions troubleshooting is complete Rake::Task["test"].invoke Rake::Task["test:docker:down"].invoke @@ -53,5 +44,64 @@ namespace :test do Rake::Task["test"].invoke Rake::Task["test:docker:down"].invoke end + + desc "run tests with docker Virtuoso backend" + task :vo do + ENV["GOO_BACKEND_NAME"]="virtuoso" + ENV["GOO_PORT"]="8890" + ENV["GOO_PATH_QUERY"]="/sparql" + ENV["GOO_PATH_DATA"]="/sparql" + ENV["GOO_PATH_UPDATE"]="/sparql" + ENV["COMPOSE_PROFILES"]="vo" + Rake::Task["test:docker:up"].invoke + # + unless system("curl -sf http://localhost:8890/sparql || exit 1") + printf("waiting for Virtuoso container to initialize") + sec = 0 + until system("curl -sf http://localhost:8890/sparql || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs virtuoso-ut") + abort(" Virtuoso container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + + + desc "run tests with docker GraphDb backend" + task :gb do + ENV["GOO_BACKEND_NAME"]="graphdb" + ENV["GOO_PORT"]="7200" + ENV["GOO_PATH_QUERY"]="/repositories/ontoportal" + ENV["GOO_PATH_DATA"]="/repositories/ontoportal/statements" + ENV["GOO_PATH_UPDATE"]="/repositories/ontoportal/statements" + ENV["COMPOSE_PROFILES"]="gb" + Rake::Task["test:docker:up"].invoke + + #system("docker compose cp ./test/data/graphdb-repo-config.ttl graphdb:/opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl") + #system("docker compose cp ./test/data/graphdb-test-load.nt graphdb:/opt/graphdb/dist/configs/templates/graphdb-test-load.nt") + #system('docker compose exec graphdb sh -c "importrdf load -f -c /opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/graphdb-test-load.nt ;"') + unless system("curl -sf http://localhost:7200/repositories || exit 1") + printf("waiting for Graphdb container to initialize") + sec = 0 + until system("curl -sf http://localhost:7200/repositories || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + system("docker compose logs graphdb") + abort(" Graphdb container hasn't initialized properly") + end + end + end + Rake::Task["test"].invoke + Rake::Task["test:docker:down"].invoke + end + end end diff --git a/test/data/graphdb-repo-config.ttl b/test/data/graphdb-repo-config.ttl new file mode 100644 index 00000000..9200da9a --- /dev/null +++ b/test/data/graphdb-repo-config.ttl @@ -0,0 +1,33 @@ +@prefix rdfs: . +@prefix rep: . +@prefix sail: . +@prefix xsd: . + +<#ontoportal> a rep:Repository; + rep:repositoryID "ontoportal"; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository"; + [ + "http://example.org/owlim#"; + "false"; + ""; + "true"; + "false"; + "true"; + "true"; + "32"; + "10000000"; + ""; + "true"; + ""; + "0"; + "0"; + "false"; + "file-repository"; + "rdfsplus-optimized"; + "storage"; + "false"; + sail:sailType "owlim:Sail" + ] + ]; + rdfs:label "" . \ No newline at end of file diff --git a/test/data/graphdb-test-load.nt b/test/data/graphdb-test-load.nt new file mode 100644 index 00000000..e69de29b diff --git a/test/data/virtuoso.init b/test/data/virtuoso.init new file mode 100644 index 00000000..e5f4bd85 --- /dev/null +++ b/test/data/virtuoso.init @@ -0,0 +1,240 @@ + ; + ; virtuoso.ini + ; + ; Configuration file for the OpenLink Virtuoso VDBMS Server + ; + ; To learn more about this product, or any other product in our + ; portfolio, please check out our web site at: + ; + ; http://virtuoso.openlinksw.com/ + ; + ; or contact us at: + ; + ; general.information@openlinksw.com + ; + ; If you have any technical questions, please contact our support + ; staff at: + ; + ; technical.support@openlinksw.com + ; + ; + ; Database setup + ; + [Database] + DatabaseFile = ../database/virtuoso.db + ErrorLogFile = ../database/virtuoso.log + LockFile = ../database/virtuoso.lck + TransactionFile = ../database/virtuoso.trx + xa_persistent_file = ../database/virtuoso.pxa + ErrorLogLevel = 7 + FileExtend = 200 + MaxCheckpointRemap = 2000 + Striping = 0 + TempStorage = TempDatabase + + [TempDatabase] + DatabaseFile = ../database/virtuoso-temp.db + TransactionFile = ../database/virtuoso-temp.trx + MaxCheckpointRemap = 2000 + Striping = 0 + + ; + ; Server parameters + ; + [Parameters] + ServerPort = 1111 + LiteMode = 0 + DisableUnixSocket = 1 + DisableTcpSocket = 0 + ;SSLServerPort = 2111 + ;SSLCertificate = cert.pem + ;SSLPrivateKey = pk.pem + ;X509ClientVerify = 0 + ;X509ClientVerifyDepth = 0 + ;X509ClientVerifyCAFile = ca.pem + MaxClientConnections = 10 + CheckpointInterval = 60 + O_DIRECT = 0 + CaseMode = 2 + MaxStaticCursorRows = 5000 + CheckpointAuditTrail = 0 + AllowOSCalls = 0 + SchedulerInterval = 10 + DirsAllowed = ., ../vad, /usr/share/proj + ThreadCleanupInterval = 0 + ThreadThreshold = 10 + ResourcesCleanupInterval = 0 + FreeTextBatchSize = 100000 + SingleCPU = 0 + VADInstallDir = ../vad/ + PrefixResultNames = 0 + RdfFreeTextRulesSize = 100 + IndexTreeMaps = 64 + MaxMemPoolSize = 200000000 + PrefixResultNames = 0 + MacSpotlight = 0 + MaxQueryMem = 2G ; memory allocated to query processor + VectorSize = 1000 ; initial parallel query vector (array of query operations) size + MaxVectorSize = 1000000 ; query vector size threshold. + AdjustVectorSize = 0 + ThreadsPerQuery = 4 + AsyncQueueMaxThreads = 10 + ;; + ;; When running with large data sets, one should configure the Virtuoso + ;; process to use between 2/3 to 3/5 of free system memory and to stripe + ;; storage on all available disks. + ;; + ;; Uncomment next two lines if there is 2 GB system memory free + ;NumberOfBuffers = 170000 + ;MaxDirtyBuffers = 130000 + ;; Uncomment next two lines if there is 4 GB system memory free + ;NumberOfBuffers = 340000 + ; MaxDirtyBuffers = 250000 + ;; Uncomment next two lines if there is 8 GB system memory free + ;NumberOfBuffers = 680000 + ;MaxDirtyBuffers = 500000 + ;; Uncomment next two lines if there is 16 GB system memory free + ;NumberOfBuffers = 1360000 + ;MaxDirtyBuffers = 1000000 + ;; Uncomment next two lines if there is 32 GB system memory free + ;NumberOfBuffers = 2720000 + ;MaxDirtyBuffers = 2000000 + ;; Uncomment next two lines if there is 48 GB system memory free + ;NumberOfBuffers = 4000000 + ;MaxDirtyBuffers = 3000000 + ;; Uncomment next two lines if there is 64 GB system memory free + ;NumberOfBuffers = 5450000 + ;MaxDirtyBuffers = 4000000 + ;; + ;; Note the default settings will take very little memory + ;; but will not result in very good performance + ;; + NumberOfBuffers = 10000 + MaxDirtyBuffers = 6000 + + [HTTPServer] + ServerPort = 8890 + ServerRoot = ../vsp + MaxClientConnections = 10 + DavRoot = DAV + EnabledDavVSP = 0 + HTTPProxyEnabled = 0 + TempASPXDir = 0 + DefaultMailServer = localhost:25 + MaxKeepAlives = 10 + KeepAliveTimeout = 10 + MaxCachedProxyConnections = 10 + ProxyConnectionCacheTimeout = 15 + HTTPThreadSize = 280000 + HttpPrintWarningsInOutput = 0 + Charset = UTF-8 + ;HTTPLogFile = logs/http.log + MaintenancePage = atomic.html + EnabledGzipContent = 1 + + [AutoRepair] + BadParentLinks = 0 + + [Client] + SQL_PREFETCH_ROWS = 100 + SQL_PREFETCH_BYTES = 16000 + SQL_QUERY_TIMEOUT = 0 + SQL_TXN_TIMEOUT = 0 + ;SQL_NO_CHAR_C_ESCAPE = 1 + ;SQL_UTF8_EXECS = 0 + ;SQL_NO_SYSTEM_TABLES = 0 + ;SQL_BINARY_TIMESTAMP = 1 + ;SQL_ENCRYPTION_ON_PASSWORD = -1 + + [VDB] + ArrayOptimization = 0 + NumArrayParameters = 10 + VDBDisconnectTimeout = 1000 + KeepConnectionOnFixedThread = 0 + + [Replication] + ServerName = db-BIONIC-PORT + ServerEnable = 1 + QueueMax = 50000 + + ; + ; Striping setup + ; + ; These parameters have only effect when Striping is set to 1 in the + ; [Database] section, in which case the DatabaseFile parameter is ignored. + ; + ; With striping, the database is spawned across multiple segments + ; where each segment can have multiple stripes. + ; + ; Format of the lines below: + ; Segment = , [, .. ] + ; + ; must be ordered from 1 up. + ; + ; The is the total size of the segment which is equally divided + ; across all stripes forming the segment. Its specification can be in + ; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks + ; (b, the default) + ; + ; Note that the segment size must be a multiple of the database page size + ; which is currently 8k. Also, the segment size must be divisible by the + ; number of stripe files forming the segment. + ; + ; The example below creates a 200 meg database striped on two segments + ; with two stripes of 50 meg and one of 100 meg. + ; + ; You can always add more segments to the configuration, but once + ; added, do not change the setup. + ; + [Striping] + Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + Segment2 = 100M, db-seg2-1.db + ;... + ;[TempStriping] + ;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db + ;Segment2 = 100M, db-seg2-1.db + ;... + ;[Ucms] + ;UcmPath = + ;Ucm1 = + ;Ucm2 = + ;... + + [Zero Config] + ServerName = virtuoso (BIONIC-PORT) + ;ServerDSN = ZDSN + ;SSLServerName = + ;SSLServerDSN = + + [Mono] + ;MONO_TRACE = Off + ;MONO_PATH = + ;MONO_ROOT = + ;MONO_CFG_DIR = + ;virtclr.dll = + + [URIQA] + DynamicLocal = 0 + DefaultHost = localhost:8890 + + [SPARQL] + ;ExternalQuerySource = 1 + ;ExternalXsltSource = 1 + ;DefaultGraph = http://localhost:8890/dataspace + ;ImmutableGraphs = http://localhost:8890/dataspace + ResultSetMaxRows = 10000 + MaxConstructTriples = 10000 + MaxQueryCostEstimationTime = 400 ; in seconds + MaxQueryExecutionTime = 60 ; in seconds + DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100 + DeferInferenceRulesInit = 0 ; controls inference rules loading + MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited) + ;LabelInferenceName = facets ; Only needed when using the Faceted Browser + ;PingService = http://rpc.pingthesemanticweb.com/ + + [Plugins] + LoadPath = ../hosting + Load1 = plain, geos + Load2 = plain, graphql + Load3 = plain, proj4 + Load4 = plain, shapefileio \ No newline at end of file diff --git a/test/data/yaml_scheme_model_test.yml b/test/data/yaml_scheme_model_test.yml new file mode 100644 index 00000000..fd8c4921 --- /dev/null +++ b/test/data/yaml_scheme_model_test.yml @@ -0,0 +1,11 @@ +name: + label: 'Name' + description: 'Person name' + equivalents: ['test:name' , 'test2:name', 'test3:person_name'] + help: 'Put the person name as string' + example: 'John' +nationality: + label: 'Person nationality' + enforcedValues: {'fr': 'france', 'us': 'USA'} + + diff --git a/test/test_cache.rb b/test/test_cache.rb index be01f518..f5659ca4 100644 --- a/test/test_cache.rb +++ b/test/test_cache.rb @@ -100,7 +100,7 @@ def test_cache_models_back_door data = " " + " " + " ." - + Goo.sparql_data_client.append_triples(Student.type_uri,data,"application/x-turtle") programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) .include(:students).all @@ -128,11 +128,16 @@ def x.response_backup *args def x.response *args raise Exception, "Should be a successful hit" end - programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) - .include(:students).all + begin + programs = Program.where(name: "BioInformatics", university: [ name: "Stanford" ]) + .include(:students).all + rescue Exception + assert false, "should be cached" + end + #from cache - assert programs.length == 1 - assert_raises Exception do + assert_equal 1, programs.length + assert_raises Exception do #different query programs = Program.where(name: "BioInformatics X", university: [ name: "Stanford" ]).all end diff --git a/test/test_case.rb b/test/test_case.rb index 7073acd5..af7f2a84 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -42,9 +42,8 @@ def _run_suites(suites, type) end def _run_suite(suite, type) - %[1,5,10,20] ret = [] - [1,5,10,20].each do |slice_size| + [Goo.slice_loading_size].each do |slice_size| puts "\nrunning test with slice_loading_size=#{slice_size}" Goo.slice_loading_size=slice_size begin diff --git a/test/test_chunks_write.rb b/test/test_chunks_write.rb index 0435e124..b121ae33 100644 --- a/test/test_chunks_write.rb +++ b/test/test_chunks_write.rb @@ -21,14 +21,14 @@ def self.after_suite _delete end + def setup + self.class._delete + end + + def self._delete graphs = [ONT_ID, ONT_ID_EXTRA] - url = Goo.sparql_data_client.url - graphs.each { |graph| - # This bypasses the chunks stuff - params = self.params_for_backend(:delete, graph.to_s) - RestClient::Request.execute(params) - } + graphs.each { |graph| Goo.sparql_data_client.delete_graph(graph) } end def test_put_data @@ -72,17 +72,16 @@ def test_put_delete_data end def test_reentrant_queries - skip "TODO: why does this test fail?" ntriples_file_path = "./test/data/nemo_ontology.ntriples" # Bypass in chunks - params = self.class.params_for_backend(:put, ONT_ID, ntriples_file_path) + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { Goo.sparql_data_client.put_triples(ONT_ID_EXTRA, ntriples_file_path, mime_type="application/x-turtle") } - sleep(1.5) + count_queries = 0 tq = Thread.new { 5.times do @@ -98,16 +97,16 @@ def test_reentrant_queries assert_equal 5, count_queries tput.join - triples_no_bnodes = 25256 + count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| - assert_equal triples_no_bnodes, sol[:c].object + assert_includes [25256, 50512], sol[:c].object end tdelete = Thread.new { Goo.sparql_data_client.delete_graph(ONT_ID_EXTRA) } - sleep(1.5) + count_queries = 0 tq = Thread.new { 5.times do @@ -119,9 +118,8 @@ def test_reentrant_queries end } tq.join - assert tdelete.alive? - assert_equal 5, count_queries tdelete.join + assert_equal 5, count_queries count = "SELECT (count(?s) as ?c) WHERE { GRAPH <#{ONT_ID_EXTRA}> { ?s ?p ?o }}" Goo.sparql_query_client.query(count).each do |sol| @@ -131,7 +129,7 @@ def test_reentrant_queries def test_query_flood ntriples_file_path = "./test/data/nemo_ontology.ntriples" - params = self.class.params_for_backend(:put, ONT_ID, ntriples_file_path) + params = self.class.params_for_backend(:post, ONT_ID, ntriples_file_path) RestClient::Request.execute(params) tput = Thread.new { @@ -144,45 +142,34 @@ def test_query_flood 50.times do |j| oq = "SELECT (count(?s) as ?c) WHERE { ?s a ?o }" Goo.sparql_query_client.query(oq).each do |sol| - assert_operator 0, :<, sol[:c].object + refute_equal 0, sol[:c] end end } end - if Goo.sparql_backend_name.downcase === BACKEND_4STORE + threads.join + + if Goo.backend_4s? log_status = [] Thread.new { 10.times do |i| log_status << Goo.sparql_query_client.status - sleep(1.2) end } - end - threads.each do |t| - t.join - end - tput.join + threads.each do |t| + t.join + end + tput.join - if Goo.sparql_backend_name.downcase == BACKEND_4STORE - assert_operator 0, :<, log_status.map { |x| x[:outstanding] }.max + assert log_status.map { |x| x[:outstanding] }.max > 0 assert_equal 16, log_status.map { |x| x[:running] }.max end end def self.params_for_backend(method, graph_name, ntriples_file_path = nil) - url = Goo.sparql_data_client.url - params = {method: method, headers: {content_type: "application/x-turtle"}, timeout: nil} - - if Goo.sparql_backend_name.downcase === BACKEND_AG - params[:url] = "#{url.to_s}?context=%22#{CGI.escape(graph_name)}%22" - params[:payload] = File.read(ntriples_file_path) if ntriples_file_path - else - params[:url] = "#{url.to_s}#{graph_name}" - params[:payload] = File.read(ntriples_file_path) if ntriples_file_path - end - params + Goo.sparql_data_client.params_for_backend(graph_name, File.read(ntriples_file_path), "text/turtle", method) end end end diff --git a/test/test_collections.rb b/test/test_collections.rb index 02bc9c3a..65d1f46d 100644 --- a/test/test_collections.rb +++ b/test/test_collections.rb @@ -89,18 +89,18 @@ def test_unique_per_collection def test_inverse_on_collection skip "Not supported inverse on collection" - john = User.find("John").include(:name).first || - User.new(name: "John").save() + john = User.find("John").include(:name).first || User.new(name: "John").save + 5.times do |i| - Issue.new(description: "issue_#{i}", owner: john).save + Issue.find("issue_#{i}").in(john) || Issue.new(description: "issue_#{i}", owner: john).save end - - binding.pry - User.find("John",include: [:issues]).first.issues - User.find("John",include: [issues: [:desciption]]).first.issues - 5.times do |i| - Issue.find("issue_#{i}", collection: john).delete + issues = User.find("John").include(:issues).first.issues + assert_equal 5, issues.size + + issues.each do |issue| + assert_equal "issue_#{i}", issue.description + assert_equal john, issue.collection end end diff --git a/test/test_dsl_settings.rb b/test/test_dsl_settings.rb index 69ab9f6e..52b0ac78 100644 --- a/test/test_dsl_settings.rb +++ b/test/test_dsl_settings.rb @@ -1,5 +1,22 @@ require_relative 'test_case' + +class NewPersonModel < Goo::Base::Resource + model :person_model_new, name_with: :name + attribute :name, type: :string, enforce: [ :existence, :unique] + attribute :multiple_values, type: [:list, :integer], enforce: [ :existence, :min_3, :max_5 ] + attribute :one_number, type: :integer,enforce: [ :existence ] #by default not a list + attribute :birth_date, type: :date_time, enforce: [ :existence ] + + attribute :created, type: DateTime , + default: lambda { |record| DateTime.now }, + namespace: :omv + + attribute :friends, type: NewPersonModel , enforce: [ :existence] + attribute :status, type: :status, enforce: [ :existence], + default: lambda { |record| StatusModel.find("single") } +end + class StatusModel < Goo::Base::Resource model :status_model, name_with: :name attribute :description, enforce: [ :existence, :unique] @@ -30,13 +47,46 @@ def initialize(attributes = {}) end end + +class YamlSchemeModelTest < Goo::Base::Resource + model :yaml_scheme_model_test, name_with: :name, scheme: 'test/data/yaml_scheme_model_test.yml' + attribute :name, enforce: [ :existence, :string, :unique] + attribute :last_name, enforce: [ :existence, :string, :unique] + attribute :birth_date, enforce: [ :existence, :date_time ] + attribute :nationality, enforce: [ :existence, :string ] + attribute :created, enforce: [ DateTime ], + default: lambda { |record| DateTime.now }, + namespace: :omv + attribute :friends, enforce: [ :existence , PersonModel] + attribute :status, enforce: [ :existence, :status ], + default: lambda { |record| StatusModel.find("single") } +end + + class TestDSLSeeting < MiniTest::Unit::TestCase def initialize(*args) super(*args) end + def test_data_type_dsl + _test_attributes_enforce NewPersonModel + end + def test_attributes_set_get + _test_attributes_enforce PersonModel + end + + def test_default_value + #default is on save ... returns` person = PersonModel.new + assert_equal nil, person.created + end + + + private + def _test_attributes_enforce(model) + person = model.new + model_key_name = model.model_name assert(person.respond_to? :id) assert(person.kind_of? Goo::Base::Resource) assert !person.valid? @@ -65,7 +115,7 @@ def test_attributes_set_get assert !person.valid? assert !person.errors[:birth_date] - person.birth_date = "X" + person.birth_date = "X" assert !person.valid? assert person.errors[:birth_date][:date_time] @@ -101,17 +151,17 @@ def test_attributes_set_get person.multiple_values << 99 end - friends = [PersonModel.new , PersonModel.new] + friends = [model.new , model.new] person.friends = friends assert !person.valid? assert person.errors[:friends][:no_list] - person.friends = PersonModel.new + person.friends = model.new assert !person.valid? - assert person.errors[:friends][:person_model] + assert person.errors[:friends][model_key_name] person.friends = "some one" assert !person.valid? - assert person.errors[:friends][:person_model] - person.friends = PersonModel.new + assert person.errors[:friends][model_key_name] + person.friends = model.new person.one_number = 99 assert !person.valid? @@ -125,7 +175,7 @@ def test_attributes_set_get assert !person.valid? assert person.errors[:one_number][:no_list] - person.one_number = 99 + person.one_number = 99 assert_equal(99, person.one_number) assert !person.valid? assert !person.errors[:one_number] @@ -137,10 +187,25 @@ def test_attributes_set_get assert !person.valid? end - def test_default_value - #default is on save ... returns` - person = PersonModel.new - assert_equal nil, person.created + def test_model_with_yaml_scheme + + settings = YamlSchemeModelTest.model_settings + attributes_settings = settings[:attributes] + + + assert_equal "test/data/yaml_scheme_model_test.yml", settings[:scheme] + + assert_equal 'Name', attributes_settings[:name][:label] + assert_equal 'Person name', attributes_settings[:name][:description] + assert_equal %w[test:name test2:name test3:person_name], attributes_settings[:name][:equivalents] + assert_equal 'Put the person name as string', attributes_settings[:name][:help] + assert_equal 'John', attributes_settings[:name][:example] + + + assert_equal 'Person nationality', attributes_settings[:nationality][:label] + hash = {fr: 'france', us: 'USA'} + assert_equal hash, attributes_settings[:nationality][:enforcedValues] + end end diff --git a/test/test_model_complex.rb b/test/test_model_complex.rb index 77d59a9b..3074683c 100644 --- a/test/test_model_complex.rb +++ b/test/test_model_complex.rb @@ -8,7 +8,7 @@ class Submission < Goo::Base::Resource end class Term < Goo::Base::Resource - model :class, + model :term, namespace: :owl, collection: :submission, name_with: :id, @@ -23,22 +23,22 @@ class Term < Goo::Base::Resource attribute :parents, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - enforce: [:list, :class] + enforce: [:list, :term] attribute :ancestors, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - enforce: [:list, :class], transitive: true + enforce: [:list, :term], transitive: true attribute :children, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - inverse: { on: :class , attribute: :parents } + inverse: { on: :term , attribute: :parents } attribute :descendants, namespace: :rdfs, property: lambda { |x| tree_property(x) }, - inverse: { on: :class , attribute: :parents }, + inverse: { on: :term , attribute: :parents }, transitive: true def self.tree_property(*args) @@ -76,12 +76,13 @@ def self.before_suite if GooTest.count_pattern("?s ?p ?o") > 100000 raise Exception, "Too many triples in KB, does not seem right to run tests" end - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def self.after_suite Goo.use_cache = false - Goo.sparql_update_client.update("DELETE {?s ?p ?o } WHERE { ?s ?p ?o }") + Goo.sparql_data_client.delete_graph(Submission.uri_type.to_s) end def test_method_handler @@ -320,7 +321,7 @@ def test_parents_inverse_children # the model declaration above. See the explanation in # https://github.com/ncbo/goo/commit/0e09816b121750b3bb875a5c24cb79865287fcf4#commitcomment-90304626 Goo.add_model(:class, Term) - + submission = Submission.new(name: "submission1") unless submission.exist? submission.save diff --git a/test/test_read_only.rb b/test/test_read_only.rb index 0b139e18..9855decf 100644 --- a/test/test_read_only.rb +++ b/test/test_read_only.rb @@ -37,11 +37,17 @@ def test_struct_find end def test_embed_struct - skip "not yet" + students = Student.where(enrolled: [university: [name: "Stanford"]]) .include(:name) - .include(enrolled: [:name, university: [ :address ]]) + .include(enrolled: [:name, university: [ :address, :name ]]) .read_only.all + + assert_equal 3, students.size + students.each do |st| + assert st.enrolled.any? {|e| e.is_a?(Struct) && e.university.name.eql?('Stanford')} + end + end end end diff --git a/test/test_schemaless.rb b/test/test_schemaless.rb index 5d0b4bdd..42084eb8 100644 --- a/test/test_schemaless.rb +++ b/test/test_schemaless.rb @@ -1,6 +1,6 @@ require_relative 'test_case' -module TestSChemaless +module TestSchemaless ONT_ID = "http:://example.org/data/nemo" @@ -116,6 +116,9 @@ def test_find_include_schemaless where = Klass.find(cognition_term).in(ontology).include(:unmapped) k = where.first enter = 0 + + assert k.unmapped.keys.include?(Goo.vocabulary(:nemo)[:definition]) + k.unmapped.each do |p,vals| if p.to_s == Goo.vocabulary(:nemo)[:synonym].to_s enter += 1 @@ -183,7 +186,19 @@ def test_index_order_by end end + + def test_all_pages_loop + ontology = Ontology.find(RDF::URI.new(ONT_ID)).first + page = 1 + count = 0 + begin + paging = Klass.in(ontology).page(page,50).all + count += paging.size + page = paging.next_page if paging.next? + end while(paging.next?) + assert_equal count, Klass.in(ontology).count + end def test_page_reuse_predicates ontology = Ontology.find(RDF::URI.new(ONT_ID)).first paging = Klass.in(ontology).include(:unmapped).page(1,100) @@ -206,7 +221,7 @@ def test_page_reuse_predicates all_ids << k.id end total += page.length - paging.page(page.next_page) if page.next? + paging.page(page.next_page, 100) if page.next? assert page.aggregate == 1713 end while(page.next?) assert all_ids.length == all_ids.uniq.length diff --git a/test/test_update_callbacks.rb b/test/test_update_callbacks.rb new file mode 100644 index 00000000..bef38a68 --- /dev/null +++ b/test/test_update_callbacks.rb @@ -0,0 +1,53 @@ +require_relative 'test_case' + + +require_relative 'models' + +class TestUpdateCallBack < Goo::Base::Resource + model :update_callback_model, name_with: :code + attribute :code, enforce: [:string, :existence] + attribute :name, enforce: [:string, :existence] + attribute :first_name, onUpdate: :update_name + attribute :last_name, onUpdate: :update_name + + + def update_name(inst, attr) + self.name = self.first_name + self.last_name + end +end + +class TestUpdateCallBacks < MiniTest::Unit::TestCase + + def self.before_suite + GooTestData.delete_all [TestUpdateCallBack] + end + + def self.after_suite + GooTestData.delete_all [TestUpdateCallBack] + end + + + def test_update_callback + p = TestUpdateCallBack.new + p.code = "1" + p.name = "name" + p.first_name = "first_name" + p.last_name = "last_name" + + assert p.valid? + p.save + + p.bring_remaining + + assert_equal p.first_name + p.last_name, p.name + + p.last_name = "last_name2" + p.save + + p.bring_remaining + assert_equal "last_name2", p.last_name + assert_equal p.first_name + p.last_name, p.name + end + +end + diff --git a/test/test_where.rb b/test/test_where.rb index f3852e13..b1abd081 100644 --- a/test/test_where.rb +++ b/test/test_where.rb @@ -1,5 +1,5 @@ -require_relative "test_case" -require_relative "models" +require_relative 'test_case' +require_relative 'models' class TestWhere < MiniTest::Unit::TestCase def initialize(*args) @@ -101,10 +101,10 @@ def test_where_2levels assert_equal 1, programs.length assert programs.first.id.to_s["Southampton/BioInformatics"] - # any program from universities in the US - programs = Program.where(university: [address: [country: "US"]]).include([:name]).all - assert_equal 3, programs.length - assert_equal ["BioInformatics", "CompSci", "Medicine"], programs.map { |p| p.name }.sort + #any program from universities in the US + programs = Program.where(university: [ address: [ country: "US" ]]).include([:name]).all + assert programs.length == 3 + assert programs.map { |p| p.name }.sort == ["BioInformatics", "CompSci", "Medicine"] end def test_where_2levels_inverse @@ -117,15 +117,15 @@ def test_where_2levels_inverse # equivalent unis = University.where(address: [country: "US"]) - .and(programs: [category: [code: "Biology"]]).all - assert_equal 1, unis.length - assert_equal "http://goo.org/default/university/Stanford", unis.first.id.to_s + .and(programs: [category: [code: "Biology"]]).all + assert unis.length == 1 + assert unis.first.id.to_s == "http://goo.org/default/university/Stanford" end def test_embed_include programs = Program.where.include(:name) - .include(university: [:name]) - .include(category: [:code]).all + .include(university: [:name]) + .include(category: [:code]).all assert_equal 9, programs.length programs.each do |p| @@ -176,8 +176,8 @@ def test_iterative_include_in_place # two levels unis = University.where.all unis_return = University.where.models(unis) - .include(programs: [:name, students: [:name]]).to_a - assert_equal unis.object_id, unis_return.object_id + .include(programs: [:name, students: [:name]]).to_a + assert unis_return.object_id == unis.object_id return_object_id = unis.map { |x| x.object_id }.uniq.sort unis_object_id = unis.map { |x| x.object_id }.uniq.sort assert_equal unis_object_id, return_object_id @@ -256,6 +256,39 @@ def test_embed_two_levels end end + def test_fetch_remaining + students = Student.where(enrolled:RDF::URI.new("http://example.org/program/Stanford/BioInformatics")) + .include(:name, :birth_date, enrolled: [:name]).all + + + s = students.select { |x| x.name['Daniel'] }.first + refute_nil s + assert_equal 2, s.enrolled.size + end + + def test_paging_with_filter_order + skip('pagination with filter and order does not work in 4s') if Goo.backend_4s? + + f = Goo::Filter.new(:birth_date) > DateTime.parse('1978-01-03') + total_count = Student.where.filter(f).count + page_1 = Student.where.include(:name, :birth_date).page(1, total_count - 1).filter(f).order_by(name: :asc).to_a + refute_empty page_1 + assert page_1.next? + page_2 = Student.where.include(:name, :birth_date).page(page_1.next_page, total_count - 1).filter(f).order_by(name: :asc).to_a + + + refute_empty page_2 + assert_equal total_count, page_1.size + page_2.size + end + + def test_two_level_include + programs = Program.where.include(:name).all + r = Program.where.models(programs).include(students: [:name]).all + r.each do |p| + refute_nil p.students + end + end + def test_unique_object_references # NOTE: unique references does not apply across different slice loading return if Goo.slice_loading_size < 100 @@ -295,9 +328,9 @@ def test_unique_object_references ["Computer Science", "Electronics", "Engineering", "Mathematics"]], daniel.enrolled.map { |p| p.category.map { |c| c.code }.sort }.sort john = students[1] - assert_equal ["Computer Science", "Electronics", "Engineering", "Mathematics"], john.enrolled.map { |p| p.category.map { |c| c.code }.sort }.sort + assert_equal ["Computer Science", "Electronics", "Engineering", "Mathematics"], john.enrolled.map { |p| p.category.map { |c| c.code }.sort }.flatten.sort susan = students.last - assert_equal ["Biology", "Computer Science", "Medicine"], susan.enrolled.map { |p| p.category.map { |c| c.code }.sort }.sort + assert_equal ["Biology", "Computer Science", "Medicine"], susan.enrolled.map { |p| p.category.map { |c| c.code }.sort }.flatten.sort categories = [] students.each do |st| @@ -311,8 +344,8 @@ def test_unique_object_references def test_complex_include # Students in a university by name students = Student.where(enrolled: [university: [name: "Stanford"]]) - .include(:name) - .include(enrolled: [:name, university: [:address]]).all + .include(:name) + .include(enrolled: [:name, university: [ :address ]]).all assert_equal ["Daniel", "John", "Susan"], students.map { |x| x.name }.sort students.each do |s| @@ -379,24 +412,24 @@ def test_where_union_pattern def test_where_direct_attributes st = Student.where(name: "Daniel") - .or(name: "Louis") - .or(name: "Lee") - .or(name: "John").all - assert_equal 4, st.length + .or(name: "Louis") + .or(name: "Lee") + .or(name: "John").all + assert st.length == 4 st = Student.where(name: "Daniel") - .and(name: "John").all - assert_equal 0, st.length + .and(name: "John").all + assert st.length == 0 st = Student.where(name: "Daniel") - .and(birth_date: DateTime.parse("1978-01-04")).all - assert_equal 1, st.length + .and(birth_date: DateTime.parse('1978-01-04')).all + assert st.length == 1 assert st.first.id.to_s["Daniel"] st = Student.where(name: "Daniel") - .or(name: "Louis") - .and(birth_date: DateTime.parse("1978-01-04")) - assert_equal 1, st.length + .or(name: "Louis") + .and(birth_date: DateTime.parse('1978-01-04')) + assert st.length == 1 assert st.first.id.to_s["Daniel"] end @@ -428,11 +461,11 @@ def test_combine_where_patterns_with_include assert_equal 2, st.length refute_equal st[1].name, st.first.name st.each do |p| - assert(p.name == "Susan" || p.name == "Daniel") - assert_kind_of Array, p.enrolled + assert (p.name == "Susan" || p.name == "Daniel") + assert Array, p.enrolled assert (p.name == "Susan" && p.enrolled.length == 1) || - (p.name == "Daniel" && p.enrolled.length == 2) - assert_kind_of String, p.enrolled.first.university.address.first.country + (p.name == "Daniel" && p.enrolled.length == 2) + assert String, p.enrolled.first.university.address.first.country end end @@ -442,29 +475,32 @@ def test_filter f = Goo::Filter.new(:birth_date) > DateTime.parse("1978-01-03") st = Student.where.filter(f).all - assert_equal ["http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Lee", - "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Robert"], st.map { |x| x.id.to_s }.sort + assert st.map { |x| x.id.to_s }.sort == ["http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Lee", + "http://goo.org/default/student/Louis", + "http://goo.org/default/student/Robert"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse("1978-01-01")) .or(Goo::Filter.new(:birth_date) >= DateTime.parse("1978-01-07")) st = Student.where.filter(f).all - assert_equal ["http://goo.org/default/student/Robert", - "http://goo.org/default/student/Susan"], st.map { |x| x.id.to_s }.sort + assert st.map { |x| x.id.to_s }.sort == [ + "http://goo.org/default/student/Robert", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) <= DateTime.parse("1978-01-01")) .or(Goo::Filter.new(:name) == "Daniel") st = Student.where.filter(f).all - assert_equal ["http://goo.org/default/student/Daniel", - "http://goo.org/default/student/Susan"], st.map { |x| x.id.to_s }.sort + assert st.map { |x| x.id.to_s }.sort == [ + "http://goo.org/default/student/Daniel", + "http://goo.org/default/student/Susan"] f = (Goo::Filter.new(:birth_date) > DateTime.parse("1978-01-02")) .and(Goo::Filter.new(:birth_date) < DateTime.parse("1978-01-06")) st = Student.where.filter(f).all - assert_equal ["http://goo.org/default/student/Daniel", + assert st.map { |x| x.id.to_s }.sort == [ + "http://goo.org/default/student/Daniel", "http://goo.org/default/student/Louis", - "http://goo.org/default/student/Tim"], st.map { |x| x.id.to_s }.sort + "http://goo.org/default/student/Tim"] f = Goo::Filter.new(enrolled: [:credits]) > 8 st = Student.where.filter(f).all @@ -473,20 +509,20 @@ def test_filter # students without awards f = Goo::Filter.new(:awards).unbound st = Student.where.filter(f) - .include(:name) - .all - assert_equal ["John", "Tim", "Louis", "Lee", "Robert"].sort, st.map { |x| x.name }.sort + .include(:name) + .all + assert st.map { |x| x.name }.sort == ["John","Tim","Louis","Lee","Robert"].sort # unbound on some non existing property f = Goo::Filter.new(enrolled: [:xxx]).unbound st = Student.where.filter(f).all - assert_equal 7, st.length + assert st.length == 7 f = Goo::Filter.new(:name).regex("n") # will find all students that contains "n" in there name st = Student.where.filter(f).include(:name).all # return "John" , "Daniel" and "Susan" assert_equal 3, st.length - assert_equal ["John", "Daniel", "Susan"].sort, st.map { |x| x.name }.sort + assert_equal ["John","Daniel","Susan"].sort, st.map { |x| x.name }.sort end def test_aggregated @@ -509,7 +545,7 @@ def test_aggregated sts = Student.where.include(:name).aggregate(:count, :enrolled).all sts.each do |st| assert (st.name == "Daniel" && st.aggregates.first.value == 2) || - st.aggregates.first.value == 1 + st.aggregates.first.value == 1 end # students enrolled in more than 1 program and get the programs name @@ -578,4 +614,16 @@ def test_include_inverse_with_find end end end + + def test_complex_order_by + u = University.where.include(address: [:country]).order_by(address: {country: :asc}).all + countries = u.map {|x| x.address.map{|a| a.country}}.flatten + assert_equal countries.sort, countries + + + u = University.where.include(address: [:country]).order_by(address: {country: :desc}).all + countries = u.map {|x| x.address.map{|a| a.country}}.flatten + assert_equal countries.sort{|a,b| b<=>a }, countries + end + end