Skip to content

Commit

Permalink
Add support for Level 2 qualifications and years with significant digits
Browse files Browse the repository at this point in the history
  • Loading branch information
mbklein committed Oct 8, 2024
1 parent 5124aaf commit 7e65705
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 83 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,9 @@ end
## Usage

See `EDTF.parse/1`, `EDTF.validate/1`, and `EDTF.humanize/1`.

## Notes

- Some human-readable dates containing Level 2 qualifications and years with significant digits,
may produce less specific results than desired.
- Level 2 years without the leading `Y` character (e.g., `2024S03`) are not supported at this time.
3 changes: 2 additions & 1 deletion lib/edtf.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defmodule EDTF do
Parse, validate, and humanize EDTF date strings
"""

alias EDTF.{Aggregate, Date, Interval}
alias EDTF.{Aggregate, Date, Interval, Level}

@doc """
Parse an EDTF date string
Expand All @@ -22,6 +22,7 @@ defmodule EDTF do
nil -> error()
mod -> mod.parse(edtf)
end
|> Level.add_level()
end

@doc """
Expand Down
148 changes: 80 additions & 68 deletions lib/edtf/date.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule EDTF.Date do

alias EDTF.{Season, Year}

@matcher ~r/^Y?-?[\dX]+(?:E\d+)?(?:-[\dX]{2})?(?:-[\dX]{2})?[~%?]?$/
@matcher ~r/^Y?[~%?]?-?[\dX]+(?:E\d+)?(?:S\d+)?(?:-[~%?]?[\dX]{2})?(?:-[~%?]?[\dX]{2})?[~%?]?$/
@subtypes [Year, Season]

defstruct type: :date,
Expand All @@ -16,8 +16,9 @@ defmodule EDTF.Date do
@type edtf_type :: :date | :century | :decade | :year
@type edtf_attribute ::
{:unspecified, integer()}
| {:uncertain, boolean()}
| {:approximate, boolean()}
| {:uncertain, integer() | boolean()}
| {:approximate, integer() | boolean()}
| {:significant, integer()}
| {:earlier, boolean()}
| {:later, boolean()}

Expand All @@ -43,58 +44,56 @@ defmodule EDTF.Date do
{edtf, attributes} = get_attributes(edtf)

case edtf do
<<"-", val::binary-size(2)>> -> {:century, [0 - String.to_integer(val)], 0}
<<val::binary-size(2)>> -> {:century, [String.to_integer(val)], 0}
<<"-", val::binary-size(3)>> -> {:decade, [0 - String.to_integer(val)], 2}
<<val::binary-size(3)>> -> {:decade, [String.to_integer(val)], 2}
<<"-", val::binary-size(2)>> -> {:century, [0 - String.to_integer(val)]}
<<val::binary-size(2)>> -> {:century, [String.to_integer(val)]}
<<"-", val::binary-size(3)>> -> {:decade, [0 - String.to_integer(val)]}
<<val::binary-size(3)>> -> {:decade, [String.to_integer(val)]}
other -> other
end
|> case do
{type, values, level} ->
{:ok, %__MODULE__{type: type, values: values, level: level, attributes: attributes}}
{type, values} ->
{:ok, %__MODULE__{type: type, values: values, attributes: attributes}}

other ->
parse_iso8601(other, attributes)
end
|> finalize(edtf)
|> case do
:error -> EDTF.error()
result -> result
end
end

defp finalize(:error, _), do: EDTF.error()
defp finalize({:ok, result}, edtf), do: {:ok, %__MODULE__{result | level: level(edtf)}}

defp parse_iso8601(<<"-", year::binary-size(4)>>, attributes),
do: parse_iso8601("-" <> year <> "-01-01", attributes, :year)
defp parse_iso8601(edtf, attributes) do
{edtf, masks} =
bitmask(edtf)

defp parse_iso8601(<<year::binary-size(4)>>, attributes),
do: parse_iso8601(year <> "-01-01", attributes, :year)
[_, sign, edtf] = Regex.run(~r/^(-?)(.+)$/, edtf)

defp parse_iso8601(<<"-", year::binary-size(4), "-", month::binary-size(2)>>, attributes),
do: parse_iso8601("-" <> year <> "-" <> month <> "-01", attributes, :month)

defp parse_iso8601(<<year::binary-size(4), "-", month::binary-size(2)>>, attributes),
do: parse_iso8601(year <> "-" <> month <> "-01", attributes, :month)

defp parse_iso8601(edtf, attributes, specificity \\ :day) do
{edtf, mask} = unspecified(edtf)
{edtf, specificity} =
case String.length(edtf) do
4 -> {"#{edtf}-01-01", :year}
7 -> {"#{edtf}-01", :month}
_ -> {edtf, :day}
end

case Elixir.Date.from_iso8601(edtf) do
case Elixir.Date.from_iso8601(sign <> edtf) do
{:ok, %Date{year: year, month: month, day: day}} ->
[year, month - 1, day] |> process_result(specificity, mask, attributes)
[year, month - 1, day] |> process_result(specificity, masks, attributes)

{:error, _} ->
:error
end
end

defp process_result(values, specificity, mask, attributes) do
defp process_result(values, specificity, masks, attributes) do
values =
case specificity do
:day -> values
:month -> Enum.take(values, 2)
:year -> Enum.take(values, 1)
end

attributes = if mask > 0, do: [{:unspecified, mask} | attributes], else: attributes
attributes = Keyword.merge(attributes, masks)

{:ok,
%__MODULE__{
Expand All @@ -103,57 +102,70 @@ defmodule EDTF.Date do
}}
end

defp unspecified(<<"-", edtf::binary>>) do
{edtf, mask} = unspecified(edtf)
{"-#{edtf}", mask}
end

defp unspecified(edtf) do
new_x = fn
{"X", 5} -> {"1", 2 ** 5}
{"X", 7} -> {"1", 2 ** 7}
{"X", p} -> {"0", 2 ** p}
{c, _} -> {c, 0}
end

{str, mask} =
defp bitmask(edtf) do
{str, _, attrs} =
edtf
|> String.graphemes()
|> Enum.reject(&(&1 == "-"))
|> Enum.with_index()
|> Enum.map(new_x)
|> Enum.reduce({"", 0}, fn {char, bits}, {str, mask} ->
{str <> char, mask + bits}
end)
|> Enum.reduce(
{"", 1, [unspecified: 0, approximate: 0, uncertain: 0]},
fn char, {str, bits, attrs} ->
case char do
"X" ->
{str <> "0", bits * 2, add_bits(attrs, :unspecified, bits)}

"~" ->
{str, bits, add_bits(attrs, :approximate, bits)}

"?" ->
{str, bits, add_bits(attrs, :uncertain, bits)}

"%" ->
{str, bits, add_bits(attrs, :approximate, bits) |> add_bits(:uncertain, bits)}

"-" ->
{str <> "-", bits, attrs}

d ->
{str <> d, bits * 2, attrs}
end
end
)

{str
|> reassemble()
|> nonzero_month_and_day(), mask}
|> nonzero_month_and_day(), Keyword.reject(attrs, fn {_, v} -> v == 0 end)}
end

defp level(edtf) do
cond do
Regex.match?(~r/^\d{2}X{2}$/, edtf) -> 1
Regex.match?(~r/^\d{3}X$/, edtf) -> 1
Regex.match?(~r/^\d{4}-XX$/, edtf) -> 1
Regex.match?(~r/^\d{4}-\d{2}-XX$/, edtf) -> 1
Regex.match?(~r/^\d{4}-XX-XX$/, edtf) -> 1
Regex.match?(~r/X/, edtf) -> 2
true -> 0
end
end
defp add_bits(attrs, attr, bits) do
bits =
cond do
# unspecified can exist in any place
attr == :unspecified -> bits
# approximate or uncertain year (XXXX-mm-dd)
bits < 15 -> 15
# approximate or uncertain month (yyyy-XX-dd)
bits < 48 -> 48
# approximate or uncertain day (yyyy-mm-XX)
bits < 192 -> 192
end

defp reassemble(<<year::binary-size(4), month::binary-size(2), day::binary-size(2)>>),
do: [year, month, day] |> Enum.join("-")
Keyword.update!(attrs, attr, fn v -> v + bits end)
end

defp nonzero_month_and_day(str), do: String.replace(str, "-00", "-01")

defp get_attributes(edtf) do
case Regex.named_captures(~r/^(?<edtf>.+?)(?<attr>[~%?])?$/, edtf) do
%{"edtf" => result, "attr" => ""} -> {result, []}
%{"edtf" => result, "attr" => "~"} -> {result, [{:approximate, true}]}
%{"edtf" => result, "attr" => "%"} -> {result, [{:approximate, true}, {:uncertain, true}]}
%{"edtf" => result, "attr" => "?"} -> {result, [{:uncertain, true}]}
%{"edtf" => result, "attr" => ""} ->
{result, []}

%{"edtf" => result, "attr" => "~"} ->
{result, [{:approximate, true}]}

%{"edtf" => result, "attr" => "%"} ->
{result, [{:approximate, true}, {:uncertain, true}]}

%{"edtf" => result, "attr" => "?"} ->
{result, [{:uncertain, true}]}
end
end
end
2 changes: 1 addition & 1 deletion lib/edtf/humanize/date.ex
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ defmodule EDTF.Humanize.Date do

defp humanize(:date, _, %{unspecified: _}), do: :original

defp humanize(:date, values, %{uncertain: true} = attributes),
defp humanize(:date, values, %{uncertain: _v} = attributes),
do: humanize(:date, values, Map.delete(attributes, :uncertain)) <> "?"

defp humanize(:date, values, _) do
Expand Down
6 changes: 3 additions & 3 deletions lib/edtf/infinity.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ defmodule EDTF.Infinity do
EDTF Infinity struct
"""

defstruct []
@type t :: %__MODULE__{}
defstruct level: 1
@type t :: %__MODULE__{level: integer()}

def match?(".."), do: true
def match?(_), do: false
def parse(".."), do: {:ok, %__MODULE__{}}
def parse(".."), do: {:ok, %__MODULE__{level: 1}}
def parse(_), do: EDTF.error()
end
4 changes: 2 additions & 2 deletions lib/edtf/interval.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule EDTF.Interval do

defstruct start: nil,
end: nil,
level: 1
level: 2

@type t :: %__MODULE__{
start: EDTF.Date.t() | nil,
Expand Down Expand Up @@ -42,6 +42,6 @@ defmodule EDTF.Interval do
end
end

defp module([start | [stop]]), do: %__MODULE__{start: start, end: stop, level: 1}
defp module([start | [stop]]), do: %__MODULE__{start: start, end: stop, level: 2}
defp module([v]), do: module([v, nil])
end
43 changes: 43 additions & 0 deletions lib/edtf/level.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
defmodule EDTF.Level do

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.15-25)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.16-25)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.15-25)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.15-26)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.17-26)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.17-25)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.16-26)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.15-26)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.16-26)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.16-25)

Modules should have a @moduledoc tag.

Check warning on line 1 in lib/edtf/level.ex

View workflow job for this annotation

GitHub Actions / test (1.17-25)

Modules should have a @moduledoc tag.
def add_level({:error, _} = error), do: error

def add_level(%EDTF.Aggregate{} = aggregate),
do: Map.update!(aggregate, :values, &add_level/1)

def add_level({:ok, value}), do: {:ok, add_level(value)}

def add_level([]), do: []
def add_level([value | values]), do: [add_level(value) | add_level(values)]
def add_level(%{level: level} = result) when level > 0, do: result
def add_level(result), do: Map.put(result, :level, determine_level(result))

defp determine_level(%EDTF.Date{type: :century}), do: 1
defp determine_level(%EDTF.Date{type: :decade}), do: 1

defp determine_level(%EDTF.Date{type: :season, values: [_, s]}) do
if s > 24, do: 2, else: 1
end

defp determine_level(%EDTF.Date{attributes: attrs, level: level, values: values}) do
if Enum.empty?(attrs),
do: level,
else: attrs |> Enum.into(%{}) |> calculate_level(values)
end

defp calculate_level(%{unspecified: bits}, values) when length(values) == 1 do
if Enum.member?([15, 14, 12, 8], bits), do: 1, else: 2
end

defp calculate_level(%{unspecified: bits}, values) when length(values) == 2 do
if Enum.member?([63, 62, 60, 56, 48, 32], bits), do: 1, else: 2
end

defp calculate_level(%{unspecified: bits}, values) when length(values) == 3 do
if Enum.member?([255, 254, 252, 248, 240, 224, 192, 128], bits), do: 1, else: 2
end

defp calculate_level(%{approximate: v}, _) when is_boolean(v), do: 1
defp calculate_level(%{approximate: _v}, _), do: 2
defp calculate_level(%{uncertain: v}, _) when is_boolean(v), do: 1
defp calculate_level(%{uncertain: _v}, _), do: 2
end
3 changes: 1 addition & 2 deletions lib/edtf/season.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ defmodule EDTF.Season do
{:ok,
%EDTF.Date{
type: :season,
values: [String.to_integer(year), String.to_integer(season)],
level: 2
values: [String.to_integer(year), String.to_integer(season)]
}}
end
end
Expand Down
20 changes: 15 additions & 5 deletions lib/edtf/year.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defmodule EDTF.Year do
Parser for EDTF Level 1 Years
"""

@matcher ~r/^Y(?<year>-?\d+)(?:E(?<exponent>\d+))?$/
@matcher ~r/^Y(?<year>-?\d+)(?:E(?<exponent>\d+))?(?:S(?<significant>\d+))?$/

def match?(edtf), do: Regex.match?(@matcher, edtf)

Expand All @@ -16,17 +16,27 @@ defmodule EDTF.Year do
end
end

defp calculate(%{"year" => year, "exponent" => ""}),
do: {:ok, %EDTF.Date{type: :year, values: [String.to_integer(year)], level: 1}}
defp calculate(%{"year" => year, "exponent" => "", "significant" => significant}),
do:
{:ok,
%EDTF.Date{type: :year, values: [String.to_integer(year)], level: 1}
|> add_significance(significant)}

defp calculate(%{"year" => year, "exponent" => exponent}) do
defp calculate(%{"year" => year, "exponent" => exponent, "significant" => significant}) do
{:ok,
%EDTF.Date{
type: :year,
values: [String.to_integer(year) * 10 ** String.to_integer(exponent)],
level: 2
}}
}
|> add_significance(significant)}
end

defp calculate(_), do: :error

defp add_significance(result, ""), do: result

defp add_significance(result, v) do
%EDTF.Date{result | level: 2, attributes: [{:significant, String.to_integer(v)}]}
end
end
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule EDTF.MixProject do
use Mix.Project

@version "1.0.0"
@version "1.1.0"
@url "https://github.com/nulib/authoritex"

def project do
Expand Down
Loading

0 comments on commit 7e65705

Please sign in to comment.