From c4e43e292d5bcf189767cab6bafaf8f2b6ab4df0 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 4 Apr 2024 14:03:50 -0700 Subject: [PATCH] Avoid allocating datasets in cases where the returned dataset would be the same as the receiver This avoids unnecessary dataset clones/allocations. The most important changes are probably ungraphed/naked, since single_value_ds uses those, and various methods call single_value_ds where the dataset is already naked and ungraphed. Issue pointed out by using the recently introduced provenance extension in a real application. --- CHANGELOG | 2 + lib/sequel/dataset/graph.rb | 1 + lib/sequel/dataset/query.rb | 13 ++++++ spec/core/dataset_spec.rb | 81 +++++++++++++++++++++++++++++++++- spec/core/object_graph_spec.rb | 4 ++ 5 files changed, 100 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 9db6d2e9ac..98999d7550 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ === master +* Avoid allocating datasets in cases where the returned dataset would be the same as the receiver (jeremyevans) + * Add provenance dataset extension, which includes comments in queries showing how and where the dataset was built (jeremyevans) === 5.79.0 (2024-04-01) diff --git a/lib/sequel/dataset/graph.rb b/lib/sequel/dataset/graph.rb index 037b5f2fa7..d8dfcdf3db 100644 --- a/lib/sequel/dataset/graph.rb +++ b/lib/sequel/dataset/graph.rb @@ -253,6 +253,7 @@ def set_graph_aliases(graph_aliases) # Remove the splitting of results into subhashes, and all metadata # related to the current graph (if any). def ungraphed + return self unless opts[:graph] clone(:graph=>nil) end diff --git a/lib/sequel/dataset/query.rb b/lib/sequel/dataset/query.rb index 84f6a36a27..b3c22222e7 100644 --- a/lib/sequel/dataset/query.rb +++ b/lib/sequel/dataset/query.rb @@ -129,6 +129,7 @@ def clone(opts = OPTS) # :nodoc: def distinct(*args, &block) virtual_row_columns(args, block) if args.empty? + return self if opts[:distinct] == EMPTY_ARRAY cached_dataset(:_distinct_ds){clone(:distinct => EMPTY_ARRAY)} else raise(InvalidOperation, "DISTINCT ON not supported") unless supports_distinct_on? @@ -230,6 +231,7 @@ def filter(*cond, &block) # # DB[:table].for_update # SELECT * FROM table FOR UPDATE def for_update + return self if opts[:lock] == :update cached_dataset(:_for_update_ds){lock_style(:update)} end @@ -641,6 +643,7 @@ def #{jtype}_join(table, opts=Sequel::OPTS) # DB.from(:a, DB[:b].where(Sequel[:a][:c]=>Sequel[:b][:d]).lateral) # # SELECT * FROM a, LATERAL (SELECT * FROM b WHERE (a.c = b.d)) def lateral + return self if opts[:lateral] cached_dataset(:_lateral_ds){clone(:lateral=>true)} end @@ -744,6 +747,7 @@ def merge_using(source, join_condition) # ds.all # => [{2=>:id}] # ds.naked.all # => [{:id=>2}] def naked + return self unless opts[:row_proc] cached_dataset(:_naked_ds){with_row_proc(nil)} end @@ -753,6 +757,7 @@ def naked # DB[:items].for_update.nowait # # SELECT * FROM items FOR UPDATE NOWAIT def nowait + return self if opts[:nowait] cached_dataset(:_nowait_ds) do raise(Error, 'This dataset does not support raises errors instead of waiting for locked rows') unless supports_nowait? clone(:nowait=>true) @@ -878,6 +883,7 @@ def qualify(table=(cache=true; first_source)) # end def returning(*values) if values.empty? + return self if opts[:returning] == EMPTY_ARRAY cached_dataset(:_returning_ds) do raise Error, "RETURNING is not supported on #{db.database_type}" unless supports_returning?(:insert) clone(:returning=>EMPTY_ARRAY) @@ -930,6 +936,7 @@ def select(*columns, &block) # DB[:items].select_all(:items, :foo) # SELECT items.*, foo.* FROM items def select_all(*tables) if tables.empty? + return self unless opts[:select] cached_dataset(:_select_all_ds){clone(:select => nil)} else select(*tables.map{|t| i, a = split_alias(t); a || i}.map!{|t| SQL::ColumnAll.new(t)}.freeze) @@ -1005,6 +1012,7 @@ def server?(server) # Specify that the check for limits/offsets when updating/deleting be skipped for the dataset. def skip_limit_check + return self if opts[:skip_limit_check] cached_dataset(:_skip_limit_check_ds) do clone(:skip_limit_check=>true) end @@ -1012,6 +1020,7 @@ def skip_limit_check # Skip locked rows when returning results from this dataset. def skip_locked + return self if opts[:skip_locked] cached_dataset(:_skip_locked_ds) do raise(Error, 'This dataset does not support skipping locked rows') unless supports_skip_locked? clone(:skip_locked=>true) @@ -1023,6 +1032,7 @@ def skip_locked # DB[:items].group(:a).having(a: 1).where(:b).unfiltered # # SELECT * FROM items GROUP BY a def unfiltered + return self unless opts[:where] || opts[:having] cached_dataset(:_unfiltered_ds){clone(:where => nil, :having => nil)} end @@ -1031,6 +1041,7 @@ def unfiltered # DB[:items].group(:a).having(a: 1).where(:b).ungrouped # # SELECT * FROM items WHERE b def ungrouped + return self unless opts[:group] || opts[:having] cached_dataset(:_ungrouped_ds){clone(:group => nil, :having => nil)} end @@ -1058,6 +1069,7 @@ def union(dataset, opts=OPTS) # # DB[:items].limit(10, 20).unlimited # SELECT * FROM items def unlimited + return self unless opts[:limit] || opts[:offset] cached_dataset(:_unlimited_ds){clone(:limit=>nil, :offset=>nil)} end @@ -1065,6 +1077,7 @@ def unlimited # # DB[:items].order(:a).unordered # SELECT * FROM items def unordered + return self unless opts[:order] cached_dataset(:_unordered_ds){clone(:order=>nil)} end diff --git a/spec/core/dataset_spec.rb b/spec/core/dataset_spec.rb index 3c332ae691..6804e9c77e 100644 --- a/spec/core/dataset_spec.rb +++ b/spec/core/dataset_spec.rb @@ -260,6 +260,11 @@ def columns; [:a, :b] end @dataset.db.sqls.must_equal ['DELETE FROM a'] end + it "should return receiver if already skipping limit check" do + ds = @dataset.skip_limit_check + ds.skip_limit_check.must_be_same_as ds + end + it "should raise on #update" do proc{@dataset.update(:a=>1)}.must_raise(Sequel::InvalidOperation) end @@ -1395,6 +1400,11 @@ def supports_cte_in_subselect?; false end @d.select_all.sql.must_equal 'SELECT * FROM test' end + it "should return receiver if called without arguments and it already uses SELECT *" do + d = @d.select_all + d.select_all.must_be_same_as d + end + it "should override the previous select option" do @d.select(:a, :b, :c).select_all.sql.must_equal 'SELECT * FROM test' end @@ -1584,6 +1594,12 @@ def supports_cte_in_subselect?; false end ds.unfiltered.sql.must_equal 'SELECT * FROM test' end end + + it "should return receiver if it does not have a WHERE or HAVING clause" do + ds = Sequel.mock.dataset.from(:test).filter(:score=>1) + ds = ds.unfiltered + ds.unfiltered.must_be_same_as ds + end end describe "Dataset#unlimited" do @@ -1593,6 +1609,12 @@ def supports_cte_in_subselect?; false end ds.unlimited.sql.must_equal 'SELECT * FROM test' end end + + it "should return receiver if it does not have a LIMIT or OFFSET" do + ds = Sequel.mock.dataset.from(:test).limit(1, 2) + ds = ds.unlimited + ds.unlimited.must_be_same_as ds + end end describe "Dataset#ungrouped" do @@ -1602,6 +1624,12 @@ def supports_cte_in_subselect?; false end ds.ungrouped.sql.must_equal 'SELECT * FROM test' end end + + it "should return receiver if it does not have a WHERE or HAVING clause" do + ds = Sequel.mock.dataset.from(:test).group(:a).having(:b) + ds = ds.ungrouped + ds.ungrouped.must_be_same_as ds + end end describe "Dataset#unordered" do @@ -1611,6 +1639,12 @@ def supports_cte_in_subselect?; false end ds.unordered.sql.must_equal 'SELECT * FROM test' end end + + it "should return receiver if it does not have an ORDER clause" do + ds = Sequel.mock.dataset.from(:test).order(:name) + ds = ds.unordered + ds.unordered.must_be_same_as ds + end end describe "Dataset#with_sql" do @@ -2014,11 +2048,16 @@ def supports_cte_in_subselect?; false end end describe "Dataset#naked" do - it "should returned clone dataset without row_proc" do + it "should return clone dataset without row_proc" do d = Sequel.mock.dataset.with_row_proc(proc{|r| r}) d.naked.row_proc.must_be_nil d.row_proc.wont_be_nil end + + it "should return receiver if it doesn't have a row_proc" do + d = Sequel.mock.dataset + d.naked.must_be_same_as d + end end describe "Dataset#qualified_column_name" do @@ -2220,6 +2259,11 @@ def ==(o) @h == o.h; end proc{@dataset.distinct(:a)}.must_raise(Sequel::InvalidOperation) end + it "should return receiver if already distinct" do + ds = @dataset.distinct + ds.distinct.must_be_same_as ds + end + it "should use DISTINCT ON if columns are given and DISTINCT ON is supported" do @dataset = @dataset.with_extend{def supports_distinct_on?; true end} @dataset.distinct(:a, :b).sql.must_equal 'SELECT DISTINCT ON (a, b) name FROM test' @@ -2523,6 +2567,13 @@ def supports_cte_in_subselect?; false end end end +describe "Dataset#lateral" do + it "should return receiver if already using LATERAL" do + ds = Sequel.mock.dataset.lateral + ds.lateral.must_be_same_as ds + end +end + describe "Dataset#join_table" do before do @d = Sequel.mock.dataset.from(:items).with_quote_identifiers(true) @@ -5206,6 +5257,11 @@ def default_timestamp_format end end + it "#for_update should return receiver if already using FOR UPDATE" do + ds = @ds.for_update + ds.for_update.must_be_same_as ds + end + it "#lock_style should accept symbols" do @ds.lock_style(:update).sql.must_equal "SELECT * FROM t FOR UPDATE" end @@ -5234,6 +5290,15 @@ def select_lock_sql(sql) super; sql << " NOWAIT" if @opts[:nowait] end @ds.nowait.sql.must_equal "SELECT * FROM t FOR UPDATE NOWAIT" end end + + it "should return receiver if it already uses NOWAIT" do + @ds = @ds.with_extend do + def supports_nowait?; true end + def select_lock_sql(sql) super; sql << " NOWAIT" if @opts[:nowait] end + end + ds = @ds.nowait + ds.nowait.must_be_same_as ds + end end describe "Dataset#skip_locked" do @@ -5255,6 +5320,15 @@ def select_lock_sql(sql) super; sql << " SKIP LOCKED" if @opts[:skip_locked] end @ds.skip_locked.sql.must_equal "SELECT * FROM t FOR UPDATE SKIP LOCKED" end end + + it "should return receiver if it already uses SKIP LOCKED" do + @ds = @ds.with_extend do + def supports_skip_locked?; true end + def select_lock_sql(sql) super; sql << " SKIP LOCKED" if @opts[:skip_locked] end + end + ds = @ds.skip_locked + ds.skip_locked.must_be_same_as ds + end end describe "Custom ASTTransformer" do @@ -5316,6 +5390,11 @@ def obj.to_s_append(ds, sql) sql << 'a' end @ds.update_sql(:foo=>1).must_equal "UPDATE t SET foo = 1" end + it "should return receiver if called without arguments and it already uses RETURNING *" do + ds = @ds.returning + ds.returning.must_be_same_as ds + end + it "should have insert, update, and delete yield to blocks if RETURNING is used" do @pr.call h = {} diff --git a/spec/core/object_graph_spec.rb b/spec/core/object_graph_spec.rb index ad01deff0f..b0b08d53e6 100644 --- a/spec/core/object_graph_spec.rb +++ b/spec/core/object_graph_spec.rb @@ -338,4 +338,8 @@ @db.fetch = {:id=>1,:x=>2,:y=>3,:lines_id=>4,:lines_x=>5,:lines_y=>6,:graph_id=>7} @ds1.graph(@ds2, :x=>:id).ungraphed.all.must_equal [{:id=>1,:x=>2,:y=>3,:lines_id=>4,:lines_x=>5,:lines_y=>6,:graph_id=>7}] end + + it "#ungraphed should return the receiver if not graphed" do + @ds1.ungraphed.must_be_same_as @ds1 + end end