-
Notifications
You must be signed in to change notification settings - Fork 10
/
example.yml
27 lines (27 loc) · 2.1 KB
/
example.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
Some Ruleset Name:
match: "{{ correspondent == 'The Bank' and document_type == 'Transfer Confirmation' }}"
metadata_regex: '(?:From (?P<source>.*?)\n)|(?:through (?P<created_month>\w*?) (?P<created_day>\d{1,2}), (?P<created_year>\d{2}))'
metadata_postprocessing:
created_year: "{{ created_year | expand_two_digit_year }}" # This uses the 'expand_year' filter, which will take a two-digit year like 57 and turn it into a four-digit year like 2057
source: '{{ source | title }}' # This applies the Jinja2 'title' filter, capitalizing each word
title: '{{created_year}}-{{created_month}}-{{created_day}} -- {{correspondent}} -- {{document_type}} (from {{ source }})'
---
# You can put multiple rules in the same file if you want
# Note that rules are applied in order, so any changes from this rule will overwrite changes from previous rules
Some Other Ruleset Name:
# This will always match
match: True
metadata_postprocessing:
title: '{{created_year}}-{{created_month}}-{{created_day}} -- {{correspondent}} -- {{document_type}}'
---
Parse creation date from filename:
# Matches all documents with a title starting with a date in iso format followed by a space
# title: "2020-01-31 important document"
match: '{{ title | regex_match("\d{4}-\d\d-\d\d ") }}'
metadata_postprocessing:
title_old: '{{ title }}' # saved as temporary value
title: '{{ title_old | regex_sub("^(?P<created_year>\d{4})-(?P<created_month>\d{2})-(?P<created_day>\d{2}) (?P<title>.*)$", "\g<title>") }}'
created_year: '{{ title_old | regex_sub("^(?P<created_year>\d{4})-(?P<created_month>\d{2})-(?P<created_day>\d{2}) (?P<title>.*)$", "\g<created_year>") }}'
created_month: '{{ title_old | regex_sub("^(?P<created_year>\d{4})-(?P<created_month>\d{2})-(?P<created_day>\d{2}) (?P<title>.*)$", "\g<created_month>") }}'
created_day: '{{ title_old | regex_sub("^(?P<created_year>\d{4})-(?P<created_month>\d{2})-(?P<created_day>\d{2}) (?P<title>.*)$", "\g<created_day>") }}'
validation_rule: '{{ num_documents(correspondent=correspondent, document_type=document_type, created_date_object=created_date_object) == 1 }}'