From d7686d51e697692bf47c12e340b0f68e877fe3a7 Mon Sep 17 00:00:00 2001 From: Pawan Dubey Date: Mon, 29 Jul 2024 15:56:26 -0400 Subject: [PATCH] WIP assertion on json type with a floating point value with zero fractional part Go test (passes): `DEBUG=1 go test ./... -v -run='TestDMLEventsTestSuite'` Ruby integration test (fails): ``` DEBUG=1 bundle exec ruby test/main.rb -v -n "TypesTest#test_json_data_update" && DEBUG=1 bundle exec ruby test/main.rb -v -n "TypesTest#test_json_data_insert" ``` This means that if the right value is supplied to the ghostferry types, it generates the correct statement (hence the go unit test passes) but if it gets bad values from the go-mysql-org library, it fails as expected. --- Gemfile | 1 + Gemfile.lock | 15 ++++++++++++ binlog_writer.go | 2 ++ dml_events.go | 1 + test/go/dml_events_test.go | 17 ++++++++++--- test/integration/types_test.rb | 44 +++++++++++++++++++--------------- test/test_helper.rb | 2 +- 7 files changed, 59 insertions(+), 23 deletions(-) diff --git a/Gemfile b/Gemfile index b3cdbf46..5d62837e 100644 --- a/Gemfile +++ b/Gemfile @@ -14,4 +14,5 @@ end group :development do gem "tqdm" gem "pry-byebug" + gem "debug" end diff --git a/Gemfile.lock b/Gemfile.lock index c83f95e1..37072f5b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,6 +5,13 @@ GEM builder (3.2.4) byebug (11.1.3) coderay (1.1.3) + debug (1.9.2) + irb (~> 1.10) + reline (>= 0.3.8) + io-console (0.7.2) + irb (1.14.0) + rdoc (>= 4.0.0) + reline (>= 0.4.2) method_source (1.0.0) minitest (5.20.0) minitest-fail-fast (0.1.0) @@ -25,7 +32,14 @@ GEM pry-byebug (3.10.1) byebug (~> 11.0) pry (>= 0.13, < 0.15) + psych (5.1.2) + stringio + rdoc (6.7.0) + psych (>= 4.0.0) + reline (0.5.9) + io-console (~> 0.5) ruby-progressbar (1.13.0) + stringio (3.1.1) tqdm (0.4.1) webrick (1.8.1) @@ -33,6 +47,7 @@ PLATFORMS ruby DEPENDENCIES + debug minitest minitest-fail-fast (~> 0.1.0) minitest-hooks diff --git a/binlog_writer.go b/binlog_writer.go index 794497ec..a3865a63 100644 --- a/binlog_writer.go +++ b/binlog_writer.go @@ -99,6 +99,8 @@ func (b *BinlogWriter) writeEvents(events []DMLEvent) error { return fmt.Errorf("generating sql query at pos %v: %v", ev.BinlogPosition(), err) } + b.logger.Debugln(sqlStmt) + queryBuffer = append(queryBuffer, sql.AnnotateStmt(sqlStmt, b.DB.Marginalia)...) queryBuffer = append(queryBuffer, ";\n"...) } diff --git a/dml_events.go b/dml_events.go index a87b1c13..fdbeba6e 100644 --- a/dml_events.go +++ b/dml_events.go @@ -425,6 +425,7 @@ func appendEscapedValue(buffer []byte, value interface{}, column schema.TableCol case string: // since https://github.com/go-mysql-org/go-mysql/pull/658/files merged, go-mysql returns JSON events as a string, but we would prefer them as []byte for consistency with other types if column.Type == schema.TYPE_JSON { + fmt.Printf("JSON value: %s\n", v) return appendEscapedBuffer(buffer, []byte(v), true) } var rightPadLengthForBinaryColumn int = 0 diff --git a/test/go/dml_events_test.go b/test/go/dml_events_test.go index 110470b2..ef8928e0 100644 --- a/test/go/dml_events_test.go +++ b/test/go/dml_events_test.go @@ -29,7 +29,7 @@ func (this *DMLEventsTestSuite) SetupTest() { columns := []schema.TableColumn{ {Name: "col1"}, - {Name: "col2"}, + {Name: "col2", Type: schema.TYPE_JSON}, {Name: "col3"}, } @@ -62,12 +62,13 @@ func (this *DMLEventsTestSuite) TestBinlogInsertEventGeneratesInsertQuery() { Rows: [][]interface{}{ {1000, []byte("val1"), true}, {1001, []byte("val2"), false}, + {1002, "{\"val\": 42.0}", false}, }, } dmlEvents, err := ghostferry.NewBinlogInsertEvents(this.eventBase, rowsEvent) this.Require().Nil(err) - this.Require().Equal(2, len(dmlEvents)) + this.Require().Equal(3, len(dmlEvents)) q1, err := dmlEvents[0].AsSQLString(this.targetTable.Schema, this.targetTable.Name) this.Require().Nil(err) @@ -76,6 +77,10 @@ func (this *DMLEventsTestSuite) TestBinlogInsertEventGeneratesInsertQuery() { q2, err := dmlEvents[1].AsSQLString(this.targetTable.Schema, this.targetTable.Name) this.Require().Nil(err) this.Require().Equal("INSERT IGNORE INTO `target_schema`.`target_table` (`col1`,`col2`,`col3`) VALUES (1001,_binary'val2',0)", q2) + + q3, err := dmlEvents[2].AsSQLString(this.targetTable.Schema, this.targetTable.Name) + this.Require().Nil(err) + this.Require().Equal("INSERT IGNORE INTO `target_schema`.`target_table` (`col1`,`col2`,`col3`) VALUES (1002,CAST('{\"val\": 42.0}' AS JSON),0)", q3) } func (this *DMLEventsTestSuite) TestBinlogInsertEventWithWrongColumnsReturnsError() { @@ -117,12 +122,14 @@ func (this *DMLEventsTestSuite) TestBinlogUpdateEventGeneratesUpdateQuery() { {1000, []byte("val2"), false}, {1001, []byte("val3"), false}, {1001, []byte("val4"), true}, + {1002, "{\"val\": 42.0}", false}, + {1002, "{\"val\": 43.0}", false}, }, } dmlEvents, err := ghostferry.NewBinlogUpdateEvents(this.eventBase, rowsEvent) this.Require().Nil(err) - this.Require().Equal(2, len(dmlEvents)) + this.Require().Equal(3, len(dmlEvents)) q1, err := dmlEvents[0].AsSQLString(this.targetTable.Schema, this.targetTable.Name) this.Require().Nil(err) @@ -131,6 +138,10 @@ func (this *DMLEventsTestSuite) TestBinlogUpdateEventGeneratesUpdateQuery() { q2, err := dmlEvents[1].AsSQLString(this.targetTable.Schema, this.targetTable.Name) this.Require().Nil(err) this.Require().Equal("UPDATE `target_schema`.`target_table` SET `col1`=1001,`col2`=_binary'val4',`col3`=1 WHERE `col1`=1001 AND `col2`=_binary'val3' AND `col3`=0", q2) + + q3, err := dmlEvents[2].AsSQLString(this.targetTable.Schema, this.targetTable.Name) + this.Require().Nil(err) + this.Require().Equal("UPDATE `target_schema`.`target_table` SET `col1`=1002,`col2`=CAST('{\"val\": 43.0}' AS JSON),`col3`=0 WHERE `col1`=1002 AND `col2`=CAST('{\"val\": 42.0}' AS JSON) AND `col3`=0", q3) } func (this *DMLEventsTestSuite) TestBinlogUpdateEventWithWrongColumnsReturnsError() { diff --git a/test/integration/types_test.rb b/test/integration/types_test.rb index 0b908e8d..40b4589b 100644 --- a/test/integration/types_test.rb +++ b/test/integration/types_test.rb @@ -1,13 +1,14 @@ require "test_helper" class TypesTest < GhostferryTestCase - JSON_OBJ = '{"data": {"quote": "\\\'", "value": [1]}}' + JSON_OBJ = '{"data": {"float": 32.0, "quote": "\\\'", "value": [1]}}' EMPTY_JSON = '{}' JSON_ARRAY = '[\"test_data\", \"test_data_2\"]' JSON_NULL = 'null' JSON_TRUE = 'true' JSON_FALSE = 'false' JSON_NUMBER = '42' + JSON_FLOATING_POINT_WITH_ZERO_FRACTIONAL_PART = '32.0' def test_json_colum_not_null_with_no_default_is_invalid_this_is_fine # See: https://bugs.mysql.com/bug.php?id=98496 @@ -103,10 +104,10 @@ def test_json_data_insert # with a JSON column is broken on 5.7. # See: https://bugs.mysql.com/bug.php?id=87847 res = target_db.query("SELECT COUNT(*) AS cnt FROM #{DEFAULT_FULL_TABLE_NAME}") - assert_equal 16, res.first["cnt"] + assert_equal 18, res.first["cnt"] expected = [ - {"id"=>1, "data"=>"{\"data\": {\"quote\": \"'\", \"value\": [1]}}"}, + {"id"=>1, "data"=>"{\"data\": {\"float\": 32.0, \"quote\": \"'\", \"value\": [1]}}"}, {"id"=>2, "data"=>"[\"test_data\", \"test_data_2\"]"}, {"id"=>3, "data"=>"{}"}, {"id"=>4, "data"=>nil}, @@ -114,15 +115,17 @@ def test_json_data_insert {"id"=>6, "data"=>"true"}, {"id"=>7, "data"=>"false"}, {"id"=>8, "data"=>"42"}, - - {"id"=>9, "data"=>"{\"data\": {\"quote\": \"'\", \"value\": [1]}}"}, - {"id"=>10, "data"=>"[\"test_data\", \"test_data_2\"]"}, - {"id"=>11, "data"=>"{}"}, - {"id"=>12, "data"=>nil}, - {"id"=>13, "data"=>"null"}, - {"id"=>14, "data"=>"true"}, - {"id"=>15, "data"=>"false"}, - {"id"=>16, "data"=>"42"}, + {"id"=>9, "data"=>"32.0"}, + + {"id"=>10, "data"=>"{\"data\": {\"float\": 32.0, \"quote\": \"'\", \"value\": [1]}}"}, + {"id"=>11, "data"=>"[\"test_data\", \"test_data_2\"]"}, + {"id"=>12, "data"=>"{}"}, + {"id"=>13, "data"=>nil}, + {"id"=>14, "data"=>"null"}, + {"id"=>15, "data"=>"true"}, + {"id"=>16, "data"=>"false"}, + {"id"=>17, "data"=>"42"}, + {"id"=>18, "data"=>"32.0"}, ] res = target_db.query("SELECT * FROM #{DEFAULT_FULL_TABLE_NAME} ORDER BY id ASC") @@ -194,15 +197,16 @@ def test_json_data_update loop do sleep 0.1 res = target_db.query("SELECT COUNT(*) AS cnt FROM #{DEFAULT_FULL_TABLE_NAME}") - if res.first["cnt"] == 8 + if res.first["cnt"] == 9 source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{EMPTY_JSON}' WHERE id = 1") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_ARRAY}' WHERE id = 2") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = NULL WHERE id = 3") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_OBJ}' WHERE id = 4") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_TRUE}' WHERE id = 5") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_FALSE}' WHERE id = 6") - source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_NUMBER}' WHERE id = 7") + source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_FLOATING_POINT_WITH_ZERO_FRACTIONAL_PART}' WHERE id = 7") source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_NULL}' WHERE id = 8") + source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAME} SET data = '#{JSON_NUMBER}' WHERE id = 9") break end @@ -217,17 +221,18 @@ def test_json_data_update refute timedout, "failed due to time out while waiting for the 4 insert binlogs to be written to the target" res = target_db.query("SELECT COUNT(*) AS cnt FROM #{DEFAULT_FULL_TABLE_NAME}") - assert_equal 8, res.first["cnt"] + assert_equal 9, res.first["cnt"] expected = [ {"id"=>1, "data"=>"{}"}, {"id"=>2, "data"=>"[\"test_data\", \"test_data_2\"]"}, {"id"=>3, "data"=>nil}, - {"id"=>4, "data"=>"{\"data\": {\"quote\": \"'\", \"value\": [1]}}"}, + {"id"=>4, "data"=>"{\"data\": {\"float\": 32.0, \"quote\": \"'\", \"value\": [1]}}"}, {"id"=>5, "data"=>"true"}, {"id"=>6, "data"=>"false"}, - {"id"=>7, "data"=>"42"}, + {"id"=>7, "data"=>"32.0"}, {"id"=>8, "data"=>"null"}, + {"id"=>9, "data"=>"42"}, ] res = target_db.query("SELECT * FROM #{DEFAULT_FULL_TABLE_NAME} ORDER BY id ASC") @@ -319,7 +324,7 @@ def test_copy_data_in_fixed_size_binary_column def test_copy_data_in_fixed_size_binary_column__value_completely_filled # Also see: https://github.com/Shopify/ghostferry/pull/159#issuecomment-597769258 - # + # # NOTE: This test is interesting (beyond what is covered above already), # because it seems the server strips the trailing 0-bytes before sending # them to the binlog even when the trailing 0-bytes are inserted by the user. @@ -334,7 +339,7 @@ def test_copy_data_in_fixed_size_binary_column__value_completely_filled def test_copy_data_in_fixed_size_binary_column__value_is_empty_and_length_is_1 # Also see: https://github.com/Shopify/ghostferry/pull/159#issuecomment-597769258 - # + # # slight variation to cover the corner-case where there is no data in the # column at all and the entire value is 0-padded (here, only 1 byte) execute_copy_data_in_fixed_size_binary_column( @@ -406,6 +411,7 @@ def insert_json_on_source source_db.query("INSERT INTO #{DEFAULT_FULL_TABLE_NAME} (data) VALUES ('#{JSON_TRUE}')") source_db.query("INSERT INTO #{DEFAULT_FULL_TABLE_NAME} (data) VALUES ('#{JSON_FALSE}')") source_db.query("INSERT INTO #{DEFAULT_FULL_TABLE_NAME} (data) VALUES ('#{JSON_NUMBER}')") + source_db.query("INSERT INTO #{DEFAULT_FULL_TABLE_NAME} (data) VALUES ('#{JSON_FLOATING_POINT_WITH_ZERO_FRACTIONAL_PART}')") end def execute_copy_data_in_fixed_size_binary_column(column_size:, inserted_data:, expected_inserted_data:, updated_data:) diff --git a/test/test_helper.rb b/test/test_helper.rb index 9b9f8b4f..1fd24825 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,7 +1,7 @@ require "stringio" require "logger" -require "pry-byebug" unless ENV["CI"] +# require "pry-byebug" unless ENV["CI"] GO_CODE_PATH = File.join(File.absolute_path(File.dirname(__FILE__)), "lib", "go") FIXTURE_PATH = File.join(File.absolute_path(File.dirname(__FILE__)), "fixtures")