Skip to content

Commit

Permalink
Switch from Regex-based parsing to NimbleParsec grammar-based parsing
Browse files Browse the repository at this point in the history
Allow qualifiers on decades and centuries
  • Loading branch information
mbklein committed Oct 11, 2024
1 parent 921c60e commit 3916b5a
Show file tree
Hide file tree
Showing 19 changed files with 482 additions and 425 deletions.
62 changes: 3 additions & 59 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,17 @@
---
name: edtf
name: test
on: [push]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
elixir:
- 1.15-25
- 1.15-26
- 1.16-25
- 1.16-26
- 1.17-25
- 1.17-26
- 1.17-27
env:
MIX_ENV: test
steps:
- name: Set Elixir and OTP versions
id: elixir-otp
run: |
echo "elixir=$(sed 's/-.*$//' <<< $version)" >> $GITHUB_OUTPUT
echo "otp=$(sed 's/^.*-//' <<< $version)" >> $GITHUB_OUTPUT
env:
version: ${{ matrix.elixir }}
- uses: actions/checkout@v2
- uses: erlef/setup-beam@v1
with:
otp-version: ${{ steps.elixir-otp.outputs.otp }}
elixir-version: ${{ steps.elixir-otp.outputs.elixir }}
otp-version: 27
elixir-version: 1.17
- name: Cache Elixir dependencies
uses: actions/cache@v2
with:
Expand All @@ -46,45 +29,6 @@ jobs:
env:
MIX_ENV: test
- name: Run Tests
run: mix test --trace
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
runs-on: ubuntu-latest
needs: test
steps:
- name: Pass all checks
run: echo "Tests passed"
coverage:
runs-on: ubuntu-latest
env:
MIX_ENV: test
steps:
- name: Set Elixir and OTP versions
id: elixir-otp
run: |
echo "elixir=$(sed 's/-.*$//' <<< $version)" >> $GITHUB_OUTPUT
echo "otp=$(sed 's/^.*-//' <<< $version)" >> $GITHUB_OUTPUT
env:
version: "1.17-27"
- uses: actions/checkout@v2
- uses: erlef/setup-beam@v1
with:
otp-version: ${{ steps.elixir-otp.outputs.otp }}
elixir-version: ${{ steps.elixir-otp.outputs.elixir }}
- name: Cache Elixir dependencies
uses: actions/cache@v2
with:
path: |
deps
_build
key: ${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}-${{ hashFiles('mix.lock') }}
${{ runner.os }}-deps-${{ steps.elixir-otp.outputs.elixir }}-${{ steps.elixir-otp.outputs.otp }}-
- name: Install Dependencies
run: mix do deps.get, deps.compile
- name: Run Tests & Coverage Analysis
run: mix coveralls.github --trace
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ See `EDTF.parse/1`, `EDTF.validate/1`, and `EDTF.humanize/1`.

- Some human-readable dates containing Level 2 qualifications and years with significant digits,
may produce less specific results than desired.
- Level 2 years without the leading `Y` character (e.g., `2024S03`) are not supported at this time.
41 changes: 12 additions & 29 deletions lib/edtf.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ defmodule EDTF do
{:error, :invalid_format}
```
"""
def parse(edtf, include \\ [Interval, Aggregate, Date]) do
case Enum.find(include, & &1.match?(edtf)) do
nil -> error()
mod -> mod.parse(edtf)
def parse(edtf) do
case EDTF.Parser.parse(edtf) do
{:ok, [result], _, _, _, _} -> {:ok, assemble(result) |> Level.add_level()}
{:error, _, _, _, _, _} -> {:error, :invalid_format}
end
|> Level.add_level()
end

defp assemble({:date, _} = result), do: Date.assemble(result)
defp assemble({:year, _} = result), do: Date.assemble(result)
defp assemble({:decade, _} = result), do: Date.assemble(result)
defp assemble({:century, _} = result), do: Date.assemble(result)
defp assemble({:interval, _} = result), do: Interval.assemble(result)
defp assemble({:set, _} = result), do: Aggregate.assemble(result)
defp assemble({:list, _} = result), do: Aggregate.assemble(result)

@doc """
Validate an EDTF date string
Expand Down Expand Up @@ -62,28 +69,4 @@ defmodule EDTF do
other -> other
end
end

@doc """
Generate an error response
"""
def error(error \\ :invalid_format), do: {:error, error}

@doc """
Identify the open-ended continuation markers on an EDTF date string
"""
def open_ended(edtf) do
case Regex.named_captures(~r/^(?<earlier>\.\.)?(?<edtf>.+?)(?<later>\.\.)?$/, edtf) do
%{"earlier" => "..", "edtf" => result, "later" => ".."} ->
{result, [{:earlier, true}, {:later, true}]}

%{"earlier" => "..", "edtf" => result} ->
{result, [{:earlier, true}, {:later, false}]}

%{"edtf" => result, "later" => ".."} ->
{result, [{:earlier, false}, {:later, true}]}

%{"edtf" => result} ->
{result, [{:earlier, false}, {:later, false}]}
end
end
end
53 changes: 16 additions & 37 deletions lib/edtf/aggregate.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@ defmodule EDTF.Aggregate do
Parser for EDTF Lists and Sets
"""

@matchers list: ~r/^\{(.+)\}$/, set: ~r/^\[(.+)\]$/

@valid [EDTF.Date, EDTF.Range]

defstruct type: nil, values: [], level: 2, earlier: false, later: false

@type t :: %__MODULE__{
Expand All @@ -17,38 +13,21 @@ defmodule EDTF.Aggregate do
later: boolean()
}

def match?(edtf), do: Enum.any?(@matchers, fn {_, re} -> Regex.match?(re, edtf) end)

def parse(edtf) do
case Enum.find(@matchers, fn {_, re} -> Regex.match?(re, edtf) end) do
nil ->
EDTF.error()

{type, re} ->
[_, dates] = Regex.run(re, edtf)
{dates, attributes} = EDTF.open_ended(dates)

Regex.split(~r/\s*,\s*/, dates)
|> Enum.reduce_while([], &reducer/2)
|> finalize(type, attributes)
end
end

defp reducer(date, acc) do
case EDTF.parse(date, @valid) do
{:ok, parsed} -> {:cont, [parsed | acc]}
{:error, _error} -> {:halt, :error}
end
end

defp finalize(:error, _, _), do: EDTF.error()

defp finalize(values, type, attributes),
do: %__MODULE__{
type: type,
values: Enum.reverse(values),
earlier: attributes[:earlier],
later: attributes[:later],
level: 2
def assemble({:list, value}), do: %__MODULE__{assemble(value) | type: :list}
def assemble({:set, value}), do: %__MODULE__{assemble(value) | type: :set}

def assemble(value) do
dates =
Keyword.get(value, :dates, [])
|> Enum.map(fn
[{:interval, _}] = v -> EDTF.Interval.assemble(v)
v -> EDTF.Date.assemble({:date, v})
end)

%__MODULE__{
values: dates,
earlier: Keyword.get(value, :earlier, false),
later: Keyword.get(value, :later, false)
}
end
end
Loading

0 comments on commit 3916b5a

Please sign in to comment.