From 4153e1a27eefc66c43201234b1f678bebb8aec73 Mon Sep 17 00:00:00 2001 From: LakshanWeerasinghe Date: Sat, 25 May 2024 00:27:46 +0530 Subject: [PATCH] Fix issue handling new line characters --- .../io/ballerina/lib/data/yaml/compiler/Constants.java | 1 - .../ballerina/stdlib/data/yaml/lexer/CharacterReader.java | 2 +- .../java/io/ballerina/stdlib/data/yaml/lexer/Scanner.java | 5 ++++- .../io/ballerina/stdlib/data/yaml/lexer/YamlLexer.java | 8 ++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/compiler-plugin/src/main/java/io/ballerina/lib/data/yaml/compiler/Constants.java b/compiler-plugin/src/main/java/io/ballerina/lib/data/yaml/compiler/Constants.java index 776575e..4e23261 100644 --- a/compiler-plugin/src/main/java/io/ballerina/lib/data/yaml/compiler/Constants.java +++ b/compiler-plugin/src/main/java/io/ballerina/lib/data/yaml/compiler/Constants.java @@ -33,5 +33,4 @@ public class Constants { static final String NAME = "Name"; static final String YAML = "yaml"; static final String DATA_YAML = "data.yaml"; - } diff --git a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/CharacterReader.java b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/CharacterReader.java index 2b3c684..16bff34 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/CharacterReader.java +++ b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/CharacterReader.java @@ -91,7 +91,7 @@ public boolean forward(int k) { } private boolean hasNewLine(int codePoint) { - return codePoint == '\n' || codePoint == '\r' && peek() == '\n'; + return codePoint == '\n'; } private boolean checkAndReadData(int k) { diff --git a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/Scanner.java b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/Scanner.java index 6e3f8c3..9087f75 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/Scanner.java +++ b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/Scanner.java @@ -108,6 +108,9 @@ public static void iterate(LexerState sm, Scan scan, Token.TokenType token, bool if (include && sm.peek() != '\n') { sm.forward(); } + if (include && sm.peek() != '\r' && sm.peek(1) != '\n') { + sm.forward(2); + } sm.tokenize(token); return; } @@ -422,7 +425,7 @@ public boolean scan(LexerState sm) throws Error.YamlParserException { peekAtIndex = sm.peek(++numWhitespace); } - if (peekAtIndex == -1 || peekAtIndex == '\n') { + if (peekAtIndex == -1 || LINE_BREAK_PATTERN.pattern(peekAtIndex)) { return true; } diff --git a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/YamlLexer.java b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/YamlLexer.java index ab789cd..49cdbed 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/YamlLexer.java +++ b/native/src/main/java/io/ballerina/stdlib/data/yaml/lexer/YamlLexer.java @@ -79,6 +79,14 @@ public static LexerState.State scanTokens(LexerState state) throws Error.YamlPar return state.getState(); } + // Check for line breaks when reading from string + if (state.peek() == '\r' && state.peek(1) == '\n' && state.getState() != LEXER_DOUBLE_QUOTE) { + state.setNewLine(true); + state.forward(); + state.tokenize(EOL); + return state.getState(); + } + return state.getState().transition(state); } }