Skip to content

Commit

Permalink
Add support for unicode 16.0.0. (#157)
Browse files Browse the repository at this point in the history
Co-authored-by: hhugo <hugo.heuzard@gmail.com>
  • Loading branch information
toots and hhugo authored Oct 23, 2024
1 parent df4a52c commit da551b9
Show file tree
Hide file tree
Showing 9 changed files with 3,060 additions and 12,829 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# dev
- Add support for unicode `16.0.0`
- Add API for retrieving start and stop positions separately (#155)

# 3.2 (2023-06-28):
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,6 @@ The `examples/` subdirectory contains several samples of sedlex in use.
- improvements to the build system
- switched parts of ppx_sedlex to using concrete syntax (with ppx_metaquot)
- Steffen Smolka: port to dune
- Romain Beauxis:
- Implementation of the unicode table extractors
- General maintenance
3 changes: 2 additions & 1 deletion examples/regressions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
module CSet = Sedlex_ppx.Sedlex_cset
module Unicode = Sedlex_ppx.Unicode

let test_versions = ("14.0.0", "15.0.0")
let test_versions = ("15.0.0", "16.0.0")

let regressions =
[ (* Example *)
Expand Down Expand Up @@ -38,6 +38,7 @@ let compare name (old_ : CSet.t) (new_ : CSet.t) =
let test new_l (name, old_l) =
(* Cn is for unassigned code points, which are allowed to be
* used in future version. *)
let old_l = Sedlex_utils.Cset.to_list old_l in
if name <> "cn" then (
let old_l =
List.fold_left
Expand Down
13,962 changes: 2,067 additions & 11,895 deletions examples/unicode_old.ml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/common/cset.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ let of_list l =
check_invariant l;
l

let to_list l = l
let max_code = 0x10ffff (* must be < max_int *)
let min_code = -1
let empty = []
Expand Down
1 change: 1 addition & 0 deletions src/common/cset.mli
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
type t = private (int * int) list

val of_list : (int * int) list -> t
val to_list : t -> (int * int) list
val min_code : int
val max_code : int
val empty : t
Expand Down
2 changes: 1 addition & 1 deletion src/generator/data/base_url
Original file line number Diff line number Diff line change
@@ -1 +1 @@
https://www.unicode.org/Public/15.0.0
https://www.unicode.org/Public/16.0.0
7 changes: 6 additions & 1 deletion src/generator/gen_unicode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ let parse_line l =
match String.index_opt l '#' with None -> l | Some i -> String.sub l 0 i
in
String.split_on_char ';' l
|> List.map String.trim

let parse_code s =
try int_of_string (Printf.sprintf "0x%s" s)
Expand Down Expand Up @@ -91,7 +92,11 @@ let files =
let interval = parse_interval interval in
let prop = parse_prop prop in
Hashtbl.add labels prop interval
| _ -> assert false );
| [_interval; ("InCB"); ("Extend"|"Consonant"|"Linker")] ->
(* TODO: support non-binary properties? *)
()
| _ ->
assert false);
( "DerivedGeneralCategory.txt",
fun s ->
match parse_line s with
Expand Down
1,909 changes: 978 additions & 931 deletions src/syntax/unicode.ml

Large diffs are not rendered by default.

0 comments on commit da551b9

Please sign in to comment.