From 95540c36eb918287ff2702dca2511a50e4e8701b Mon Sep 17 00:00:00 2001 From: Matt VanEseltine Date: Thu, 1 Aug 2019 15:11:25 -0400 Subject: [PATCH] Introduce parameterized pytest suite This creates an alternative test suite using the wonderful pytest and parameterizing over 400 names off into json storage. It is intended to include all functionality of the existing test suite, while also: * Segmenting most names into json format in ./pytest/names * Keeping exactly the same groupings by class (though tweaked names) * Keeping all test names visible * Folding several comments into explicit expected fails and skips * Improving visibility of such xfails and skips * Automagically running twice for CONSTANTS.empty_attribute_default * Adding no further requirements beyond the pytest package --- .travis.yml | 6 +- dev-requirements.txt | 1 + pytest/conftest.py | 18 + pytest/names/bare_names.json | 176 ++++++ pytest/names/brute_force.json | 805 +++++++++++++++++++++++++ pytest/names/capitalization.json | 59 ++ pytest/names/conjunction.json | 199 +++++++ pytest/names/first_name.json | 57 ++ pytest/names/nickname.json | 105 ++++ pytest/names/prefix.json | 118 ++++ pytest/names/suffix.json | 140 +++++ pytest/names/test_bank.json | 983 +++++++++++++++++++++++++++++++ pytest/names/title.json | 221 +++++++ pytest/names_test.py | 711 ++++++++++++++++++++++ pytest/pytest.ini | 3 + 15 files changed, 3600 insertions(+), 2 deletions(-) create mode 100644 pytest/conftest.py create mode 100644 pytest/names/bare_names.json create mode 100644 pytest/names/brute_force.json create mode 100644 pytest/names/capitalization.json create mode 100644 pytest/names/conjunction.json create mode 100644 pytest/names/first_name.json create mode 100644 pytest/names/nickname.json create mode 100644 pytest/names/prefix.json create mode 100644 pytest/names/suffix.json create mode 100644 pytest/names/test_bank.json create mode 100644 pytest/names/title.json create mode 100644 pytest/names_test.py create mode 100644 pytest/pytest.ini diff --git a/.travis.yml b/.travis.yml index dc37c42..d0c9a1b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,10 +8,12 @@ python: - "3.5" - "3.6" # command to install dependencies -install: +install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi - "pip install dill" - "python setup.py install" # command to run tests -script: python tests.py +script: + - python tests.py + - python -m pytest sudo: false diff --git a/dev-requirements.txt b/dev-requirements.txt index 8aab0b6..0202432 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,3 +4,4 @@ coverage>=4.0.3 dill>=0.2.5 twine Sphinx +pytest diff --git a/pytest/conftest.py b/pytest/conftest.py new file mode 100644 index 0000000..9e6c8c8 --- /dev/null +++ b/pytest/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.fixture( + scope="module", + autouse=True, + params=["", None], + ids=["empty_is_blank", "empty_is_None"], +) +def empty_attribute_value(request): + """Run the entire test suite twice to vary CONSTANTS.empty_attribute_default. + + First run CONSTANTS.empty_attribute_default = "" + Second run CONSTANTS.empty_attribute_default = None + """ + from nameparser.config import CONSTANTS + + CONSTANTS.empty_attribute_default = request.param diff --git a/pytest/names/bare_names.json b/pytest/names/bare_names.json new file mode 100644 index 0000000..053040d --- /dev/null +++ b/pytest/names/bare_names.json @@ -0,0 +1,176 @@ +[ + "John Doe", + "John Doe, Jr.", + "John Doe III", + "Doe, John", + "Doe, John, Jr.", + "Doe, John III", + "John A. Doe", + "John A. Doe, Jr.", + "John A. Doe III", + "Doe, John A.", + "Doe, John A., Jr.", + "Doe, John A. III", + "John A. Kenneth Doe", + "John A. Kenneth Doe, Jr.", + "John A. Kenneth Doe III", + "Doe, John A. Kenneth", + "Doe, John A. Kenneth, Jr.", + "Doe, John A. Kenneth III", + "Dr. John Doe", + "Dr. John Doe, Jr.", + "Dr. John Doe III", + "Doe, Dr. John", + "Doe, Dr. John, Jr.", + "Doe, Dr. John III", + "Dr. John A. Doe", + "Dr. John A. Doe, Jr.", + "Dr. John A. Doe III", + "Doe, Dr. John A.", + "Doe, Dr. John A. Jr.", + "Doe, Dr. John A. III", + "Dr. John A. Kenneth Doe", + "Dr. John A. Kenneth Doe, Jr.", + "Dr. John A. Kenneth Doe III", + "Doe, Dr. John A. Kenneth", + "Doe, Dr. John A. Kenneth Jr.", + "Doe, Dr. John A. Kenneth III", + "Juan de la Vega", + "Juan de la Vega, Jr.", + "Juan de la Vega III", + "de la Vega, Juan", + "de la Vega, Juan, Jr.", + "de la Vega, Juan III", + "Juan Velasquez y Garcia", + "Juan Velasquez y Garcia, Jr.", + "Juan Velasquez y Garcia III", + "Velasquez y Garcia, Juan", + "Velasquez y Garcia, Juan, Jr.", + "Velasquez y Garcia, Juan III", + "Dr. Juan de la Vega", + "Dr. Juan de la Vega, Jr.", + "Dr. Juan de la Vega III", + "de la Vega, Dr. Juan", + "de la Vega, Dr. Juan, Jr.", + "de la Vega, Dr. Juan III", + "Dr. Juan Velasquez y Garcia", + "Dr. Juan Velasquez y Garcia, Jr.", + "Dr. Juan Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan", + "Velasquez y Garcia, Dr. Juan, Jr.", + "Velasquez y Garcia, Dr. Juan III", + "Juan Q. de la Vega", + "Juan Q. de la Vega, Jr.", + "Juan Q. de la Vega III", + "de la Vega, Juan Q.", + "de la Vega, Juan Q., Jr.", + "de la Vega, Juan Q. III", + "Juan Q. Velasquez y Garcia", + "Juan Q. Velasquez y Garcia, Jr.", + "Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q.", + "Velasquez y Garcia, Juan Q., Jr.", + "Velasquez y Garcia, Juan Q. III", + "Dr. Juan Q. de la Vega", + "Dr. Juan Q. de la Vega, Jr.", + "Dr. Juan Q. de la Vega III", + "de la Vega, Dr. Juan Q.", + "de la Vega, Dr. Juan Q., Jr.", + "de la Vega, Dr. Juan Q. III", + "Dr. Juan Q. Velasquez y Garcia", + "Dr. Juan Q. Velasquez y Garcia, Jr.", + "Dr. Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q.", + "Velasquez y Garcia, Dr. Juan Q., Jr.", + "Velasquez y Garcia, Dr. Juan Q. III", + "Juan Q. Xavier de la Vega", + "Juan Q. Xavier de la Vega, Jr.", + "Juan Q. Xavier de la Vega III", + "de la Vega, Juan Q. Xavier", + "de la Vega, Juan Q. Xavier, Jr.", + "de la Vega, Juan Q. Xavier III", + "Juan Q. Xavier Velasquez y Garcia", + "Juan Q. Xavier Velasquez y Garcia, Jr.", + "Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q. Xavier", + "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Juan Q. Xavier III", + "Dr. Juan Q. Xavier de la Vega", + "Dr. Juan Q. Xavier de la Vega, Jr.", + "Dr. Juan Q. Xavier de la Vega III", + "de la Vega, Dr. Juan Q. Xavier", + "de la Vega, Dr. Juan Q. Xavier, Jr.", + "de la Vega, Dr. Juan Q. Xavier III", + "Dr. Juan Q. Xavier Velasquez y Garcia", + "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "Dr. Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q. Xavier", + "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "John Doe, CLU, CFP, LUTC", + "John P. Doe, CLU, CFP, LUTC", + "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "Hon. Barrington P. Doe-Ray, Jr.", + "Doe-Ray, Hon. Barrington P. Jr.", + "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "Jose Aznar y Lopez", + "John E Smith", + "John e Smith", + "John and Jane Smith", + "Rev. John A. Kenneth Doe", + "Donovan McNabb-Smith", + "Rev John A. Kenneth Doe", + "Doe, Rev. John A. Jr.", + "Buca di Beppo", + "Lt. Gen. John A. Kenneth Doe, Jr.", + "Doe, Lt. Gen. John A. Kenneth IV", + "Lt. Gen. John A. Kenneth Doe IV", + "Mr. and Mrs. John Smith", + "John Jones (Google Docs)", + "john e jones", + "john e jones, III", + "jones, john e", + "E.T. Smith", + "E.T. Smith, II", + "Smith, E.T., Jr.", + "A.B. Vajpayee", + "Rt. Hon. Paul E. Mary", + "Maid Marion", + "Amy E. Maid", + "Jane Doctor", + "Doctor, Jane E.", + "dr. ben alex johnson III", + "Lord of the Universe and Supreme King of the World Lisa Simpson", + "Benjamin (Ben) Franklin", + "Benjamin \"Ben\" Franklin", + "Brian O'connor", + "Sir Gerald", + "Magistrate Judge John F. Forster, Jr", + "Magistrate Judge Joaquin V.E. Manibusan, Jr", + "Magistrate-Judge Elizabeth Todd Campbell", + "Mag-Judge Harwell G Davis, III", + "Mag. Judge Byron G. Cudmore", + "Chief Judge J. Leon Holmes", + "Chief Judge Sharon Lovelace Blackburn", + "Judge James M. Moody", + "Judge G. Thomas Eisele", + "Judge Callie V. S. Granade", + "Judge C Lynwood Smith, Jr", + "Senior Judge Charles R. Butler, Jr", + "Senior Judge Harold D. Vietor", + "Senior Judge Virgil Pittman", + "Honorable Terry F. Moorer", + "Honorable W. Harold Albritton, III", + "Honorable Judge W. Harold Albritton, III", + "Honorable Judge Terry F. Moorer", + "Honorable Judge Susan Russ Walker", + "Hon. Marian W. Payson", + "Hon. Charles J. Siragusa", + "US Magistrate Judge T Michael Putnam", + "Designated Judge David A. Ezra", + "Sr US District Judge Richard G Kopf", + "U.S. District Judge Marc Thomas Treadwell", + "Dra. Andréia da Silva", + "Srta. Andréia da Silva" +] \ No newline at end of file diff --git a/pytest/names/brute_force.json b/pytest/names/brute_force.json new file mode 100644 index 0000000..458ef6d --- /dev/null +++ b/pytest/names/brute_force.json @@ -0,0 +1,805 @@ +[ + { + "raw": "John Doe", + "first": "John", + "last": "Doe" + }, + { + "raw": "John Doe, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "John Doe III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, John", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, John, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, John III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "John A. Doe", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "John A. Doe, Jr", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr" + }, + { + "raw": "John A. Doe III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, John A.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, John A., Jr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, John A., III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "John A. Kenneth Doe", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "John A. Kenneth Doe, Jr.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "John A. Kenneth Doe III", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Doe, John. A. Kenneth", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "Doe, John. A. Kenneth, Jr.", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "Doe, John. A. Kenneth III", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Dr. John Doe", + "first": "John", + "last": "Doe", + "title": "Dr." + }, + { + "raw": "Dr. John Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Dr. John Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John", + "title": "Dr.", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Doe", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Dr. John A. Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, Dr. John A. Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. III", + "title": "Dr.", + "middle": "A.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Kenneth Doe", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Dr. John A. Kenneth Doe, Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Al Arnold Gore, Jr.", + "middle": "Arnold", + "first": "Al", + "last": "Gore", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Kenneth Doe III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A. Kenneth", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John A. Kenneth Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. Kenneth III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Juan de la Vega", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Juan de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Juan de la Vega III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Juan Velasquez y Garcia", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Velasquez y Garcia, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Velasquez y Garcia III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan de la Vega", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan de la Vega, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan de la Vega III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Dr. Juan Velasquez y Garcia", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Velasquez y Garcia, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Velasquez y Garcia III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. de la Vega", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "Juan Q. de la Vega III", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III" + }, + { + "raw": "Juan Q. Velasquez y Garcia", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Velasquez y Garcia, Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. de la Vega", + "title": "Dr.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr.", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III", + "title": "Dr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia, Jr.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. III", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q. Xavier", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. Xavier III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "title": "Dr.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier", + "first": "Juan", + "title": "Dr.", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "title": "Dr.", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "John Doe, CLU, CFP, LUTC", + "first": "John", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "John P. Doe, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe-Ray", + "title": "Dr.", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "title": "Dr.", + "middle": "P.", + "first": "John", + "last": "Doe-Ray", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Hon. Barrington P. Doe-Ray, Jr.", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr." + }, + { + "raw": "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr., CFP, LUTC" + }, + { + "raw": "Rt. Hon. Paul E. Mary", + "title": "Rt. Hon.", + "first": "Paul", + "middle": "E.", + "last": "Mary" + }, + { + "raw": "Lord God Almighty", + "title": "Lord", + "first": "God", + "last": "Almighty" + } +] \ No newline at end of file diff --git a/pytest/names/capitalization.json b/pytest/names/capitalization.json new file mode 100644 index 0000000..90f07dd --- /dev/null +++ b/pytest/names/capitalization.json @@ -0,0 +1,59 @@ +[ + { + "id": "test_downcasing_mac", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=15", + "raw": "RONALD MACDONALD", + "string": "Ronald MacDonald" + }, + { + "id": "test_downcasing_mc", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=23", + "raw": "RONALD MCDONALD", + "string": "Ronald McDonald" + }, + { + "id": "test_capitalization_exception_for_III", + "raw": "juan q. xavier velasquez y garcia iii", + "string": "Juan Q. Xavier Velasquez y Garcia III" + }, + { + "id": "test_capitalize_title", + "raw": "lt. gen. john a. kenneth doe iv", + "string": "Lt. Gen. John A. Kenneth Doe IV" + }, + { + "id": "test_capitalize_title_to_lower", + "raw": "LT. GEN. JOHN A. KENNETH DOE IV", + "string": "Lt. Gen. John A. Kenneth Doe IV" + }, + { + "id": "test_capitalization_with_Mac_as_hyphenated_names", + "raw": "donovan mcnabb-smith", + "string": "Donovan McNabb-Smith" + }, + { + "id": "test_capitization_middle_initial_is_also_a_conjunction", + "raw": "scott e. werner", + "string": "Scott E. Werner" + }, + { + "id": "test_capitalize_diacritics", + "raw": "matthëus schmidt", + "string": "Matthëus Schmidt" + }, + { + "id": "test_short_names_with_mac", + "raw": "mack johnson", + "string": "Mack Johnson" + }, + { + "id": "test_portuguese_prefixes", + "raw": "joao da silva do amaral de souza", + "string": "Joao da Silva do Amaral de Souza" + }, + { + "id": "test_capitalize_prefix_clash_on_first_name", + "raw": "van nguyen", + "string": "Van Nguyen" + } +] \ No newline at end of file diff --git a/pytest/names/conjunction.json b/pytest/names/conjunction.json new file mode 100644 index 0000000..318adf3 --- /dev/null +++ b/pytest/names/conjunction.json @@ -0,0 +1,199 @@ +[ + { + "id": "test_last_name_with_conjunction", + "raw": "Jose Aznar y Lopez", + "first": "Jose", + "last": "Aznar y Lopez" + }, + { + "id": "test_multiple_conjunctions", + "raw": "part1 of The part2 of the part3 and part4", + "first": "part1 of The part2 of the part3 and part4" + }, + { + "id": "test_multiple_conjunctions2", + "raw": "part1 of and The part2 of the part3 And part4", + "first": "part1 of and The part2 of the part3 And part4" + }, + { + "id": "test_ends_with_conjunction", + "raw": "Jon Dough and", + "first": "Jon", + "last": "Dough and" + }, + { + "id": "test_ends_with_two_conjunctions", + "raw": "Jon Dough and of", + "first": "Jon", + "last": "Dough and of" + }, + { + "id": "test_starts_with_conjunction", + "raw": "and Jon Dough", + "first": "and Jon", + "last": "Dough" + }, + { + "id": "test_starts_with_two_conjunctions", + "raw": "the and Jon Dough", + "first": "the and Jon", + "last": "Dough" + }, + { + "id": "test_uppercase_middle_initial_conflict_with_conjunction_upper_means_initial", + "raw": "John E Smith", + "first": "John", + "middle": "E", + "last": "Smith" + }, + { + "id": "test_lowercase_middle_initial_with_period_conflict_with_conjunction", + "raw": "john e. smith", + "first": "john", + "middle": "e.", + "last": "smith" + }, + { + "id": "test_lowercase_first_initial_conflict_with_conjunction", + "raw": "e j smith", + "first": "e", + "middle": "j", + "last": "smith" + }, + { + "id": "test_lowercase_middle_initial_conflict_with_conjunction", + "raw": "John e Smith", + "first": "John", + "middle": "e", + "last": "Smith" + }, + { + "id": "test_lowercase_middle_initial_and_suffix_conflict_with_conjunction", + "raw": "John e Smith, III", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III" + }, + { + "id": "test_lowercase_middle_initial_and_nocomma_suffix_conflict_with_conjun", + "raw": "John e Smith III", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III" + }, + { + "id": "test_lowercase_middle_initial_comma_lastname_and_suffix_conflict_with_conjun", + "raw": "Smith, John e, III, Jr", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III, Jr" + }, + { + "id": "test_couples_names", + "raw": "John and Jane Smith", + "first": "John and Jane", + "last": "Smith" + }, + { + "id": "test_couples_names_with_conjunction_lastname", + "raw": "John and Jane Aznar y Lopez", + "first": "John and Jane", + "last": "Aznar y Lopez" + }, + { + "id": "test_couple_titles", + "raw": "Mr. and Mrs. John and Jane Smith", + "title": "Mr. and Mrs.", + "first": "John and Jane", + "last": "Smith" + }, + { + "id": "test_title_with_three_part_name_last_initial_is_suffix_uppercase_no_p", + "raw": "King John Alexander V", + "title": "King", + "first": "John", + "last": "Alexander", + "suffix": "V" + }, + { + "id": "test_four_name_parts_with_suffix_that_could_be_initial_lowercase_no_p", + "raw": "larry james edward johnson v", + "first": "larry", + "middle": "james edward", + "last": "johnson", + "suffix": "v" + }, + { + "id": "test_four_name_parts_with_suffix_that_could_be_initial_uppercase_no_p", + "raw": "Larry James Johnson I", + "first": "Larry", + "middle": "James", + "last": "Johnson", + "suffix": "I" + }, + { + "id": "test_roman_numeral_initials", + "raw": "Larry V I", + "first": "Larry", + "middle": "V", + "last": "I" + }, + { + "id": "test124_Rev_title", + "raw": "Rev. John A. Kenneth Doe", + "title": "Rev.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "id": "test125_Rev_title", + "raw": "Rev John A. Kenneth Doe", + "title": "Rev", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "id": "test126_Rev_title", + "raw": "Doe, Rev. John A. Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "id": "test127", + "raw": "Buca di Beppo", + "first": "Buca", + "last": "di Beppo" + }, + { + "id": "test_le_as_last_name", + "raw": "Yin Le", + "first": "Yin", + "last": "Le" + }, + { + "id": "test_le_as_last_name_with_middle_initial", + "raw": "Yin a Le", + "first": "Yin", + "middle": "a", + "last": "Le" + }, + { + "id": "test_conjunction_in_an_address_with_a_title", + "raw": "His Excellency Lord Duncan", + "title": "His Excellency Lord", + "last": "Duncan" + }, + { + "id": "test_name_is_conjunctions", + "raw": "e and e", + "first": "e and e" + } +] \ No newline at end of file diff --git a/pytest/names/first_name.json b/pytest/names/first_name.json new file mode 100644 index 0000000..9ac13ba --- /dev/null +++ b/pytest/names/first_name.json @@ -0,0 +1,57 @@ +[ + { + "id": "test_first_name_is_not_prefix_if_only_two_parts", + "_note": "When there are only two parts, don't join prefixes or conjunctions", + "raw": "Van Nguyen", + "first": "Van", + "last": "Nguyen" + }, + { + "id": "test_first_name", + "raw": "Andrew", + "first": "Andrew" + }, + { + "id": "test_assume_title_and_one_other_name_is_last_name", + "raw": "Rev Andrews", + "title": "Rev", + "last": "Andrews" + }, + { + "id": "test_suffix_in_lastname_part_of_lastname_comma_format", + "raw": "Smith Jr., John", + "last": "Smith", + "first": "John", + "suffix": "Jr." + }, + { + "id": "test_sir_exception_to_first_name_rule", + "raw": "Sir Gerald", + "title": "Sir", + "first": "Gerald" + }, + { + "id": "test_king_exception_to_first_name_rule", + "raw": "King Henry", + "title": "King", + "first": "Henry" + }, + { + "id": "test_queen_exception_to_first_name_rule", + "raw": "Queen Elizabeth", + "title": "Queen", + "first": "Elizabeth" + }, + { + "id": "test_dame_exception_to_first_name_rule", + "raw": "Dame Mary", + "title": "Dame", + "first": "Mary" + }, + { + "id": "test_first_name_is_not_prefix_if_only_two_parts_comma", + "raw": "Nguyen, Van", + "first": "Van", + "last": "Nguyen" + } +] \ No newline at end of file diff --git a/pytest/names/nickname.json b/pytest/names/nickname.json new file mode 100644 index 0000000..11e009b --- /dev/null +++ b/pytest/names/nickname.json @@ -0,0 +1,105 @@ +[ + { + "id": "test_nickname_in_parenthesis", + "_note": "https://code.google.com/p/python-nameparser/issues/detail?id=33", + "raw": "Benjamin (Ben) Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_two_word_nickname_in_parenthesis", + "raw": "Benjamin (Big Ben) Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Big Ben" + }, + { + "id": "test_two_words_in_quotes", + "raw": "Benjamin \"Big Ben\" Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Big Ben" + }, + { + "id": "test_nickname_in_parenthesis_with_comma", + "raw": "Franklin, Benjamin (Ben)", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_parenthesis_with_comma_and_suffix", + "raw": "Franklin, Benjamin (Ben), Jr.", + "first": "Benjamin", + "last": "Franklin", + "suffix": "Jr.", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_single_quotes", + "raw": "Benjamin 'Ben' Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_double_quotes", + "raw": "Benjamin \"Ben\" Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_single_quotes_on_first_name_not_treated_as_nickname", + "raw": "Brian Andrew O'connor", + "first": "Brian", + "middle": "Andrew", + "last": "O'connor" + }, + { + "id": "test_single_quotes_on_both_name_not_treated_as_nickname", + "raw": "La'tanya O'connor", + "first": "La'tanya", + "last": "O'connor" + }, + { + "id": "test_single_quotes_on_end_of_last_name_not_treated_as_nickname", + "raw": "Mari' Aube'", + "first": "Mari'", + "last": "Aube'" + }, + { + "id": "test_okina_inside_name_not_treated_as_nickname", + "raw": "Harrieta Keōpūolani Nāhiʻenaʻena", + "first": "Harrieta", + "middle": "Keōpūolani", + "last": "Nāhiʻenaʻena" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Hawaiian_example", + "raw": "Harietta Keopuolani Nahi'ena'ena", + "first": "Harietta", + "middle": "Keopuolani", + "last": "Nahi'ena'ena" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Kenyan_example", + "raw": "Naomi Wambui Ng'ang'a", + "first": "Naomi", + "middle": "Wambui", + "last": "Ng'ang'a" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Samoan_example", + "raw": "Va'apu'u Vitale", + "first": "Va'apu'u", + "last": "Vitale" + }, + { + "id": "test_nickname_and_last_name", + "raw": "\"Rick\" Edmonds", + "last": "Edmonds", + "nickname": "Rick" + } +] \ No newline at end of file diff --git a/pytest/names/prefix.json b/pytest/names/prefix.json new file mode 100644 index 0000000..590c5ab --- /dev/null +++ b/pytest/names/prefix.json @@ -0,0 +1,118 @@ +[ + { + "id": "test_comma_two_part_last_name_with_suffix_in_first_part", + "_note": "I'm kinda surprised this works, not really sure if this is a realistic place for a suffix to be.", + "raw": "von bergen wessels MD, pennie", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD" + }, + { + "id": "test_prefix", + "raw": "Juan del Sur", + "first": "Juan", + "last": "del Sur" + }, + { + "id": "test_prefix_with_period", + "raw": "Jill St. John", + "first": "Jill", + "last": "St. John" + }, + { + "id": "test_prefix_before_two_part_last_name", + "raw": "pennie von bergen wessels", + "first": "pennie", + "last": "von bergen wessels" + }, + { + "id": "test_prefix_before_two_part_last_name_with_suffix", + "raw": "pennie von bergen wessels III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_prefix_before_two_part_last_name_with_acronym_suffix", + "raw": "pennie von bergen wessels M.D.", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "M.D." + }, + { + "id": "test_two_part_last_name_with_suffix_comma", + "raw": "pennie von bergen wessels, III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_two_part_last_name_with_suffix", + "raw": "von bergen wessels, pennie III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_last_name_two_part_last_name_with_two_suffixes", + "raw": "von bergen wessels MD, pennie III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD, III" + }, + { + "id": "test_comma_two_part_last_name_with_acronym_suffix", + "raw": "von bergen wessels, pennie MD", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD" + }, + { + "id": "test_title_two_part_last_name_with_suffix_in_first_part", + "raw": "pennie von bergen wessels MD, III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD, III" + }, + { + "id": "test_portuguese_dos", + "raw": "Rafael Sousa dos Anjos", + "first": "Rafael", + "middle": "Sousa", + "last": "dos Anjos" + }, + { + "id": "test_portuguese_prefixes", + "raw": "Joao da Silva do Amaral de Souza", + "first": "Joao", + "middle": "da Silva do Amaral", + "last": "de Souza" + }, + { + "id": "test_three_conjunctions", + "raw": "Dr. Juan Q. Xavier de la dos Vega III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "id": "test_lastname_three_conjunctions", + "raw": "de la dos Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "id": "test_comma_three_conjunctions", + "raw": "Dr. Juan Q. Xavier de la dos Vega, III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + } +] \ No newline at end of file diff --git a/pytest/names/suffix.json b/pytest/names/suffix.json new file mode 100644 index 0000000..af2f082 --- /dev/null +++ b/pytest/names/suffix.json @@ -0,0 +1,140 @@ +[ + { + "id": "test_two_suffixes", + "_note": "This adds a comma when the original format did not have one. Not ideal but at least its in the right bucket.", + "raw": "Kenneth Clarke QC MP", + "first": "Kenneth", + "last": "Clarke", + "suffix": "QC, MP" + }, + { + "id": "test_two_suffixes_lastname_comma_format", + "_note": "This adds a comma when the original format did not have one.", + "raw": "Washington Jr. MD, Franklin", + "first": "Franklin", + "last": "Washington", + "suffix": "Jr., MD" + }, + { + "id": "test_suffix", + "raw": "Joe Franklin Jr", + "first": "Joe", + "last": "Franklin", + "suffix": "Jr" + }, + { + "id": "test_suffix_with_periods", + "raw": "Joe Dentist D.D.S.", + "first": "Joe", + "last": "Dentist", + "suffix": "D.D.S." + }, + { + "id": "test_two_suffixes_suffix_comma_format", + "raw": "Franklin Washington, Jr. MD", + "first": "Franklin", + "last": "Washington", + "suffix": "Jr. MD" + }, + { + "id": "test_suffix_containing_periods", + "raw": "Kenneth Clarke Q.C.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C." + }, + { + "id": "test_suffix_containing_periods_lastname_comma_format", + "raw": "Clarke, Kenneth, Q.C. M.P.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C. M.P." + }, + { + "id": "test_suffix_containing_periods_suffix_comma_format", + "raw": "Kenneth Clarke Q.C., M.P.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C., M.P." + }, + { + "id": "test_suffix_with_single_comma_format", + "raw": "John Doe jr., MD", + "first": "John", + "last": "Doe", + "suffix": "jr., MD" + }, + { + "id": "test_suffix_with_double_comma_format", + "raw": "Doe, John jr., MD", + "first": "John", + "last": "Doe", + "suffix": "jr., MD" + }, + { + "id": "test_phd_with_erroneous_space", + "raw": "John Smith, Ph. D.", + "first": "John", + "last": "Smith", + "suffix": "Ph. D." + }, + { + "id": "test_phd_conflict", + "raw": "Adolph D", + "first": "Adolph", + "last": "D" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_with_suffix", + "raw": "Jack Ma Jr", + "first": "Jack", + "last": "Ma", + "suffix": "Jr" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_with_suffix_comma", + "raw": "Ma III, Jack Jr", + "first": "Jack", + "last": "Ma", + "suffix": "III, Jr" + }, + { + "id": "test_potential_suffix_that_is_also_last_name", + "raw": "Jack Ma", + "first": "Jack", + "last": "Ma" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_comma", + "raw": "Ma, Jack", + "first": "Jack", + "last": "Ma" + }, + { + "id": "test_potential_suffix_that_is_also_first_name_comma", + "raw": "Johnson, Bart", + "first": "Bart", + "last": "Johnson" + }, + { + "id": "test_multiple_letter_suffix_with_periods", + "raw": "John Doe Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + }, + { + "id": "test_suffix_with_periods_with_comma", + "raw": "John Doe, Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + }, + { + "id": "test_suffix_with_periods_with_lastname_comma", + "raw": "Doe, John Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + } +] \ No newline at end of file diff --git a/pytest/names/test_bank.json b/pytest/names/test_bank.json new file mode 100644 index 0000000..c9a3538 --- /dev/null +++ b/pytest/names/test_bank.json @@ -0,0 +1,983 @@ +{ + "singular_test_names": [ + "John Doe", + "John Doe, Jr.", + "John Doe III", + "Doe, John", + "Doe, John, Jr.", + "Doe, John III", + "John A. Doe", + "John A. Doe, Jr.", + "John A. Doe III", + "Doe, John A.", + "Doe, John A., Jr.", + "Doe, John A. III", + "John A. Kenneth Doe", + "John A. Kenneth Doe, Jr.", + "John A. Kenneth Doe III", + "Doe, John A. Kenneth", + "Doe, John A. Kenneth, Jr.", + "Doe, John A. Kenneth III", + "Dr. John Doe", + "Dr. John Doe, Jr.", + "Dr. John Doe III", + "Doe, Dr. John", + "Doe, Dr. John, Jr.", + "Doe, Dr. John III", + "Dr. John A. Doe", + "Dr. John A. Doe, Jr.", + "Dr. John A. Doe III", + "Doe, Dr. John A.", + "Doe, Dr. John A. Jr.", + "Doe, Dr. John A. III", + "Dr. John A. Kenneth Doe", + "Dr. John A. Kenneth Doe, Jr.", + "Dr. John A. Kenneth Doe III", + "Doe, Dr. John A. Kenneth", + "Doe, Dr. John A. Kenneth Jr.", + "Doe, Dr. John A. Kenneth III", + "Juan de la Vega", + "Juan de la Vega, Jr.", + "Juan de la Vega III", + "de la Vega, Juan", + "de la Vega, Juan, Jr.", + "de la Vega, Juan III", + "Juan Velasquez y Garcia", + "Juan Velasquez y Garcia, Jr.", + "Juan Velasquez y Garcia III", + "Velasquez y Garcia, Juan", + "Velasquez y Garcia, Juan, Jr.", + "Velasquez y Garcia, Juan III", + "Dr. Juan de la Vega", + "Dr. Juan de la Vega, Jr.", + "Dr. Juan de la Vega III", + "de la Vega, Dr. Juan", + "de la Vega, Dr. Juan, Jr.", + "de la Vega, Dr. Juan III", + "Dr. Juan Velasquez y Garcia", + "Dr. Juan Velasquez y Garcia, Jr.", + "Dr. Juan Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan", + "Velasquez y Garcia, Dr. Juan, Jr.", + "Velasquez y Garcia, Dr. Juan III", + "Juan Q. de la Vega", + "Juan Q. de la Vega, Jr.", + "Juan Q. de la Vega III", + "de la Vega, Juan Q.", + "de la Vega, Juan Q., Jr.", + "de la Vega, Juan Q. III", + "Juan Q. Velasquez y Garcia", + "Juan Q. Velasquez y Garcia, Jr.", + "Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q.", + "Velasquez y Garcia, Juan Q., Jr.", + "Velasquez y Garcia, Juan Q. III", + "Dr. Juan Q. de la Vega", + "Dr. Juan Q. de la Vega, Jr.", + "Dr. Juan Q. de la Vega III", + "de la Vega, Dr. Juan Q.", + "de la Vega, Dr. Juan Q., Jr.", + "de la Vega, Dr. Juan Q. III", + "Dr. Juan Q. Velasquez y Garcia", + "Dr. Juan Q. Velasquez y Garcia, Jr.", + "Dr. Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q.", + "Velasquez y Garcia, Dr. Juan Q., Jr.", + "Velasquez y Garcia, Dr. Juan Q. III", + "Juan Q. Xavier de la Vega", + "Juan Q. Xavier de la Vega, Jr.", + "Juan Q. Xavier de la Vega III", + "de la Vega, Juan Q. Xavier", + "de la Vega, Juan Q. Xavier, Jr.", + "de la Vega, Juan Q. Xavier III", + "Juan Q. Xavier Velasquez y Garcia", + "Juan Q. Xavier Velasquez y Garcia, Jr.", + "Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q. Xavier", + "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Juan Q. Xavier III", + "Dr. Juan Q. Xavier de la Vega", + "Dr. Juan Q. Xavier de la Vega, Jr.", + "Dr. Juan Q. Xavier de la Vega III", + "de la Vega, Dr. Juan Q. Xavier", + "de la Vega, Dr. Juan Q. Xavier, Jr.", + "de la Vega, Dr. Juan Q. Xavier III", + "Dr. Juan Q. Xavier Velasquez y Garcia", + "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "Dr. Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q. Xavier", + "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "John Doe, CLU, CFP, LUTC", + "John P. Doe, CLU, CFP, LUTC", + "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "Hon. Barrington P. Doe-Ray, Jr.", + "Doe-Ray, Hon. Barrington P. Jr.", + "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "Jose Aznar y Lopez", + "John E Smith", + "John e Smith", + "John and Jane Smith", + "Rev. John A. Kenneth Doe", + "Donovan McNabb-Smith", + "Rev John A. Kenneth Doe", + "Doe, Rev. John A. Jr.", + "Buca di Beppo", + "Lt. Gen. John A. Kenneth Doe, Jr.", + "Doe, Lt. Gen. John A. Kenneth IV", + "Lt. Gen. John A. Kenneth Doe IV", + "Mr. and Mrs. John Smith", + "John Jones (Google Docs)", + "john e jones", + "john e jones, III", + "jones, john e", + "E.T. Smith", + "E.T. Smith, II", + "Smith, E.T., Jr.", + "A.B. Vajpayee", + "Rt. Hon. Paul E. Mary", + "Maid Marion", + "Amy E. Maid", + "Jane Doctor", + "Doctor, Jane E.", + "dr. ben alex johnson III", + "Lord of the Universe and Supreme King of the World Lisa Simpson", + "Benjamin (Ben) Franklin", + "Benjamin \"Ben\" Franklin", + "Brian O'connor", + "Sir Gerald", + "Magistrate Judge John F. Forster, Jr", + "Magistrate Judge Joaquin V.E. Manibusan, Jr", + "Magistrate-Judge Elizabeth Todd Campbell", + "Mag-Judge Harwell G Davis, III", + "Mag. Judge Byron G. Cudmore", + "Chief Judge J. Leon Holmes", + "Chief Judge Sharon Lovelace Blackburn", + "Judge James M. Moody", + "Judge G. Thomas Eisele", + "Judge Callie V. S. Granade", + "Judge C Lynwood Smith, Jr", + "Senior Judge Charles R. Butler, Jr", + "Senior Judge Harold D. Vietor", + "Senior Judge Virgil Pittman", + "Honorable Terry F. Moorer", + "Honorable W. Harold Albritton, III", + "Honorable Judge W. Harold Albritton, III", + "Honorable Judge Terry F. Moorer", + "Honorable Judge Susan Russ Walker", + "Hon. Marian W. Payson", + "Hon. Charles J. Siragusa", + "US Magistrate Judge T Michael Putnam", + "Designated Judge David A. Ezra", + "Sr US District Judge Richard G Kopf", + "U.S. District Judge Marc Thomas Treadwell", + "Dra. Andréia da Silva", + "Srta. Andréia da Silva" + ], + "brute_force": [ + { + "raw": "John Doe", + "first": "John", + "last": "Doe" + }, + { + "raw": "John Doe, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "John Doe III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, John", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, John, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, John III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "John A. Doe", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "John A. Doe, Jr", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr" + }, + { + "raw": "John A. Doe III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, John A.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, John A., Jr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, John A., III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "John A. Kenneth Doe", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "John A. Kenneth Doe, Jr.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "John A. Kenneth Doe III", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Doe, John. A. Kenneth", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "Doe, John. A. Kenneth, Jr.", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "Doe, John. A. Kenneth III", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Dr. John Doe", + "first": "John", + "last": "Doe", + "title": "Dr." + }, + { + "raw": "Dr. John Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Dr. John Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John", + "title": "Dr.", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Doe", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Dr. John A. Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, Dr. John A. Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. III", + "title": "Dr.", + "middle": "A.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Kenneth Doe", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Dr. John A. Kenneth Doe, Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Al Arnold Gore, Jr.", + "middle": "Arnold", + "first": "Al", + "last": "Gore", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Kenneth Doe III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A. Kenneth", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John A. Kenneth Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. Kenneth III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Juan de la Vega", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Juan de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Juan de la Vega III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Juan Velasquez y Garcia", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Velasquez y Garcia, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Velasquez y Garcia III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan de la Vega", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan de la Vega, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan de la Vega III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Dr. Juan Velasquez y Garcia", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Velasquez y Garcia, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Velasquez y Garcia III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. de la Vega", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "Juan Q. de la Vega III", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III" + }, + { + "raw": "Juan Q. Velasquez y Garcia", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Velasquez y Garcia, Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. de la Vega", + "title": "Dr.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr.", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III", + "title": "Dr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia, Jr.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. III", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q. Xavier", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. Xavier III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "title": "Dr.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier", + "first": "Juan", + "title": "Dr.", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "title": "Dr.", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "John Doe, CLU, CFP, LUTC", + "first": "John", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "John P. Doe, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe-Ray", + "title": "Dr.", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "title": "Dr.", + "middle": "P.", + "first": "John", + "last": "Doe-Ray", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Hon. Barrington P. Doe-Ray, Jr.", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr." + }, + { + "raw": "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr., CFP, LUTC" + }, + { + "raw": "Rt. Hon. Paul E. Mary", + "title": "Rt. Hon.", + "first": "Paul", + "middle": "E.", + "last": "Mary" + }, + { + "raw": "Lord God Almighty", + "title": "Lord", + "first": "God", + "last": "Almighty" + } + ] +} \ No newline at end of file diff --git a/pytest/names/title.json b/pytest/names/title.json new file mode 100644 index 0000000..357524d --- /dev/null +++ b/pytest/names/title.json @@ -0,0 +1,221 @@ +[ + { + "id": "test_last_name_also_prefix", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=13", + "raw": "Jane Doctor", + "first": "Jane", + "last": "Doctor" + }, + { + "id": "test_last_name_is_also_title", + "raw": "Amy E Maid", + "first": "Amy", + "middle": "E", + "last": "Maid" + }, + { + "id": "test_last_name_is_also_title_no_comma", + "raw": "Dr. Martin Luther King Jr.", + "title": "Dr.", + "first": "Martin", + "middle": "Luther", + "last": "King", + "suffix": "Jr." + }, + { + "id": "test_last_name_is_also_title_with_comma", + "raw": "Duke Martin Luther King, Jr.", + "title": "Duke", + "first": "Martin", + "middle": "Luther", + "last": "King", + "suffix": "Jr." + }, + { + "id": "test_last_name_is_also_title3", + "raw": "John King", + "first": "John", + "last": "King" + }, + { + "id": "test_title_with_conjunction", + "raw": "Secretary of State Hillary Clinton", + "title": "Secretary of State", + "first": "Hillary", + "last": "Clinton" + }, + { + "id": "test_compound_title_with_conjunction", + "raw": "Cardinal Secretary of State Hillary Clinton", + "title": "Cardinal Secretary of State", + "first": "Hillary", + "last": "Clinton" + }, + { + "id": "test_title_is_title", + "raw": "Coach", + "title": "Coach" + }, + { + "id": "test_conflict_with_chained_title_first_name_initial", + "raw": "U. S. Grant", + "first": "U.", + "middle": "S.", + "last": "Grant" + }, + { + "id": "test_chained_title_first_name_initial_with_no_period", + "raw": "US Magistrate Judge T Michael Putnam", + "title": "US Magistrate Judge", + "first": "T", + "middle": "Michael", + "last": "Putnam" + }, + { + "id": "test_chained_hyphenated_title", + "raw": "US Magistrate-Judge Elizabeth E Campbell", + "title": "US Magistrate-Judge", + "first": "Elizabeth", + "middle": "E", + "last": "Campbell" + }, + { + "id": "test_chained_hyphenated_title_with_comma_suffix", + "raw": "Mag-Judge Harwell G Davis, III", + "title": "Mag-Judge", + "first": "Harwell", + "middle": "G", + "last": "Davis", + "suffix": "III" + }, + { + "id": "test_title_starts_with_conjunction", + "raw": "The Rt Hon John Jones", + "title": "The Rt Hon", + "first": "John", + "last": "Jones" + }, + { + "id": "test_conjunction_before_title", + "raw": "The Lord of the Universe", + "title": "The Lord of the Universe" + }, + { + "id": "test_double_conjunction_on_title", + "raw": "Lord of the Universe", + "title": "Lord of the Universe" + }, + { + "id": "test_triple_conjunction_on_title", + "raw": "Lord and of the Universe", + "title": "Lord and of the Universe" + }, + { + "id": "test_multiple_conjunctions_on_multiple_titles", + "raw": "Lord of the Universe and Associate Supreme Queen of the World Lisa Simpson", + "title": "Lord of the Universe and Associate Supreme Queen of the World", + "first": "Lisa", + "last": "Simpson" + }, + { + "id": "test_title_with_last_initial_is_suffix", + "raw": "King John V.", + "title": "King", + "first": "John", + "last": "V." + }, + { + "id": "test_initials_also_suffix", + "raw": "Smith, J.R.", + "first": "J.R.", + "last": "Smith" + }, + { + "id": "test_two_title_parts_separated_by_periods", + "raw": "Lt.Gen. John A. Kenneth Doe IV", + "title": "Lt.Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title", + "raw": "Lt. Gen. John A. Kenneth Doe IV", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title_with_lastname_comma", + "raw": "Doe, Lt. Gen. John A. Kenneth IV", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title_with_suffix_comma", + "raw": "Lt. Gen. John A. Kenneth Doe, Jr.", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "id": "test_possible_conflict_with_middle_initial_that_could_be_suffix", + "raw": "Doe, Rev. John V, Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "V", + "suffix": "Jr." + }, + { + "id": "test_possible_conflict_with_suffix_that_could_be_initial", + "raw": "Doe, Rev. John A., V, Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "V, Jr." + }, + { + "id": "test_ben_as_first_name", + "raw": "Ben Johnson", + "first": "Ben", + "last": "Johnson" + }, + { + "id": "test_ben_as_first_name_with_middle_name", + "raw": "Ben Alex Johnson", + "first": "Ben", + "middle": "Alex", + "last": "Johnson" + }, + { + "id": "test_ben_as_middle_name", + "raw": "Alex Ben Johnson", + "first": "Alex", + "middle": "Ben", + "last": "Johnson" + }, + { + "id": "test_title_with_periods", + "raw": "Lt.Gov. John Doe", + "title": "Lt.Gov.", + "first": "John", + "last": "Doe" + }, + { + "id": "test_title_with_periods_lastname_comma", + "raw": "Doe, Lt.Gov. John", + "title": "Lt.Gov.", + "first": "John", + "last": "Doe" + } +] \ No newline at end of file diff --git a/pytest/names_test.py b/pytest/names_test.py new file mode 100644 index 0000000..c12e884 --- /dev/null +++ b/pytest/names_test.py @@ -0,0 +1,711 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function +from __future__ import unicode_literals +from io import open +import json +import os +import sys + +import pytest + +from nameparser import HumanName +from nameparser.config import CONSTANTS, Constants +from nameparser.util import u + +TEST_DATA_DIRECTORY = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "names" +) +print(TEST_DATA_DIRECTORY) + + +def load_bank(category): + filename = category + ".json" + test_bank_file = os.path.join(TEST_DATA_DIRECTORY, filename) + + with open(test_bank_file, "r", encoding="utf8") as infile: + # with io.open(test_bank_file, "r") as infile: + test_bank = json.load(infile, encoding="utf-8") + print("Loading {} cases for {} from {}.".format(len(test_bank), category, filename)) + return test_bank + + +def dict_entry_test(dict_entry): + hn = HumanName(dict_entry["raw"]) + for attr in hn._members: + actual = getattr(hn, attr) + expected = dict_entry.get(attr, CONSTANTS.empty_attribute_default) + assert actual == expected + + +def make_ids(entry): + return entry.get("id") or entry.get("raw") + + +class TestCoreFunctionality: + @pytest.mark.parametrize( + "entry", + [ + { + "id": "test_utf8", + "raw": "de la Véña, Jüan", + "first": "Jüan", + "last": "de la Véña", + }, + { + "id": "test_escaped_utf8_bytes", + "raw": b"B\xc3\xb6ck, Gerald", + "first": "Gerald", + "last": "Böck", + }, + { + "id": "test_conjunction_names", + "raw": "johnny y", + "first": "johnny", + "last": "y", + }, + { + "id": "test_prefixed_names", + "raw": "vai la", + "first": "vai", + "last": "la", + }, + ], + ids=make_ids, + ) + def test_basics(self, entry): + dict_entry_test(entry) + + def test_blank(self): + # This can't be parametrized in the same way as test_basics, because + # CONSTANTS.empty_attribute_default is itself paramatrized at the module level + dict_entry_test( + { + "id": "test_blank_name", + "raw": "", + "first": CONSTANTS.empty_attribute_default, + "last": CONSTANTS.empty_attribute_default, + } + ) + + def test_string_output(self,): + hn = HumanName("de la Véña, Jüan") + print(hn) + print(repr(hn)) + + @pytest.mark.parametrize( + "raw, length", [("Doe-Ray, Dr. John P., CLU, CFP, LUTC", 5), ("John Doe", 2)] + ) + def test_len(self, raw, length): + assert len(HumanName(raw)) == length + + def test_comparison(self): + hn1 = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("Dr. John P. Doe-Ray, CLU, CFP, LUTC") + assert hn1 == hn2 + assert hn1 is not hn2 + assert hn1 == "Dr. John P. Doe-Ray CLU, CFP, LUTC" + hn1 = HumanName("Doe, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("Dr. John P. Doe-Ray, CLU, CFP, LUTC") + assert hn1 != hn2 + assert hn1 != 0 + assert hn1 != "test" + assert hn1 != ["test"] + assert hn1 != {"test": hn2} + + def test_assignment_to_full_name(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + assert hn.first == "John" + assert hn.last == "Doe" + assert hn.middle == "A. Kenneth" + assert hn.suffix == "Jr." + hn.full_name = "Juan Velasquez y Garcia III" + assert hn.first == "Juan" + assert hn.last == "Velasquez y Garcia" + assert hn.suffix == "III" + + def test_get_full_name_attribute_references_internal_lists(self): + hn = HumanName("John Williams") + hn.first_list = ["Larry"] + assert hn.full_name, "Larry Williams" + + def test_assignment_to_attribute(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + hn.last = "de la Vega" + assert hn.last == "de la Vega" + hn.title = "test" + assert hn.title == "test" + hn.first = "test" + assert hn.first == "test" + hn.middle = "test" + assert hn.middle == "test" + hn.suffix = "test" + assert hn.suffix == "test" + with pytest.raises(TypeError): + hn.suffix = [["test"]] + with pytest.raises(TypeError): + hn.suffix = {"test": "test"} + + def test_assign_list_to_attribute(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + hn.title = ["test1", "test2"] + assert hn.title == "test1 test2" + hn.first = ["test3", "test4"] + assert hn.first == "test3 test4" + hn.middle = ["test5", "test6", "test7"] + assert hn.middle == "test5 test6 test7" + hn.last = ["test8", "test9", "test10"] + assert hn.last == "test8 test9 test10" + hn.suffix = ["test"] + assert hn.suffix == "test" + + def test_comparison_case_insensitive(self): + hn1 = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("dr. john p. doe-Ray, CLU, CFP, LUTC") + assert hn1 == hn2 + assert hn1 is not hn2 + assert hn1 == "Dr. John P. Doe-ray clu, CFP, LUTC" + + def test_slice(self): + hn = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + assert list(hn), ["Dr.", "John", "P.", "Doe-Ray", "CLU, CFP, LUTC"] + assert hn[1:] == [ + "John", + "P.", + "Doe-Ray", + "CLU, CFP, LUTC", + hn.C.empty_attribute_default, + ] + assert hn[1:-2], ["John", "P.", "Doe-Ray"] + + def test_getitem(self): + hn = HumanName("Dr. John A. Kenneth Doe, Jr.") + assert hn["title"], "Dr." + assert hn["first"], "John" + assert hn["last"], "Doe" + assert hn["middle"], "A. Kenneth" + assert hn["suffix"], "Jr." + + def test_setitem(self): + hn = HumanName("Dr. John A. Kenneth Doe, Jr.") + hn["title"] = "test" + assert hn["title"], "test" + hn["last"] = ["test", "test2"] + assert hn["last"], "test test2" + with pytest.raises(TypeError): + hn["suffix"] = [["test"]] + with pytest.raises(TypeError): + hn["suffix"] = {"test": "test"} + + def test_surnames_list_attribute(self): + hn = HumanName("John Edgar Casey Williams III") + assert hn.surnames_list, ["Edgar", "Casey", "Williams"] + + def test_surnames_attribute(self): + hn = HumanName("John Edgar Casey Williams III") + assert hn.surnames == "Edgar Casey Williams" + + +class TestPickle: + + try: + import dill + + no_dill = False + except ImportError: + no_dill = True + + @pytest.mark.skipif(no_dill, reason="requires python-dill module to test pickling") + def test_config_pickle(self): + constants = Constants() + self.dill.pickles(constants) + + @pytest.mark.skipif(no_dill, reason="requires python-dill module to test pickling") + def test_name_instance_pickle(self): + hn = HumanName("Title First Middle Middle Last, Jr.") + self.dill.pickles(hn) + + +class TestHumanNameBruteForce: + @pytest.mark.parametrize("entry", load_bank("brute_force"), ids=make_ids) + def test_brute(self, entry): + dict_entry_test(entry) + + +class TestFirstNameHandling: + @pytest.mark.parametrize("entry", load_bank("first_name"), ids=make_ids) + def test_json_first_name(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail( + reason="# TODO: Seems 'Andrews, M.D.', Andrews should be treated as a last name" + "but other suffixes like 'George Jr.' should be first names. " + "Might be related to https://github.com/derek73/python-nameparser/issues/2" + ) + def test_assume_suffix_title_and_one_other_name_is_last_name(self): + hn = HumanName("Andrews, M.D.") + assert hn.suffix == "M.D." + assert hn.last == "Andrews" + + @pytest.mark.xfail + def test_first_name_is_prefix_if_three_parts(self): + """Not sure how to fix this without breaking Mr and Mrs""" + hn = HumanName("Mr. Van Nguyen") + assert hn.first == "Van" + assert hn.last == "Nguyen" + + +class TestHumanNameConjunction: + @pytest.mark.parametrize("entry", load_bank("conjunction"), ids=make_ids) + def test_json_conjunction(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail + def test_two_initials_conflict_with_conjunction(self): + # Supporting this seems to screw up titles with periods in them like M.B.A. + hn = HumanName("E.T. Smith") + assert hn.first == "E." + assert hn.middle == "T." + assert hn.last == "Smith" + + @pytest.mark.xfail + def test_conjunction_in_an_address_with_a_first_name_title(self): + hn = HumanName("Her Majesty Queen Elizabeth") + assert hn.title == "Her Majesty Queen" + # if you want to be technical, Queen is in FIRST_NAME_TITLES + assert hn.first == "Elizabeth" + + +class TestConstantsCustomization: + def test_add_title(self): + hn = HumanName("Te Awanui-a-Rangi Black", constants=None) + start_len = len(hn.C.titles) + assert start_len > 0 + hn.C.titles.add("te") + assert start_len + 1 == len(hn.C.titles) + hn.parse_full_name() + assert hn.title == "Te" + assert hn.first == "Awanui-a-Rangi" + assert hn.last == "Black" + + def test_remove_title(self): + hn = HumanName("Hon Solo", constants=None) + start_len = len(hn.C.titles) + assert start_len > 0 + hn.C.titles.remove("hon") + assert start_len - 1 == len(hn.C.titles) + hn.parse_full_name() + assert hn.first == "Hon" + assert hn.last == "Solo" + + def test_add_multiple_arguments(self): + hn = HumanName("Assoc Dean of Chemistry Robert Johns", constants=None) + hn.C.titles.add("dean", "Chemistry") + hn.parse_full_name() + assert hn.title == "Assoc Dean of Chemistry" + assert hn.first == "Robert" + assert hn.last == "Johns" + + def test_instances_can_have_own_constants(self): + hn = HumanName("", None) + hn2 = HumanName("") + hn.C.titles.remove("hon") + assert "hon" not in hn.C.titles + assert hn.has_own_config + assert "hon" in hn2.C.titles + assert not hn2.has_own_config + + def test_can_change_global_constants(self): + hn = HumanName("") + hn2 = HumanName("") + hn.C.titles.remove("hon") + assert "hon" not in hn.C.titles + assert "hon" not in hn2.C.titles + assert not hn.has_own_config + assert not hn2.has_own_config + # clean up so we don't mess up other tests + hn.C.titles.add("hon") + + def test_remove_multiple_arguments(self): + hn = HumanName("Ms Hon Solo", constants=None) + hn.C.titles.remove("hon", "ms") + hn.parse_full_name() + assert hn.first == "Ms" + assert hn.middle == "Hon" + assert hn.last == "Solo" + + def test_chain_multiple_arguments(self): + hn = HumanName("Dean Ms Hon Solo", constants=None) + hn.C.titles.remove("hon", "ms").add("dean") + hn.parse_full_name() + assert hn.title == "Dean" + assert hn.first == "Ms" + assert hn.middle == "Hon" + assert hn.last == "Solo" + + def test_empty_attribute_default(self): + from nameparser.config import CONSTANTS + + _orig = CONSTANTS.empty_attribute_default + CONSTANTS.empty_attribute_default = None + hn = HumanName("") + assert hn.title is None + assert hn.first is None + assert hn.middle is None + assert hn.last is None + assert hn.suffix is None + assert hn.nickname is None + CONSTANTS.empty_attribute_default = _orig + + def test_empty_attribute_on_instance(self): + hn = HumanName("", None) + hn.C.empty_attribute_default = None + assert hn.title is None + assert hn.first is None + assert hn.middle is None + assert hn.last is None + assert hn.suffix is None + assert hn.nickname is None + + def test_none_empty_attribute_string_formatting(self): + hn = HumanName("", None) + hn.C.empty_attribute_default = None + assert str(hn) == "" + + def test_add_constant_with_explicit_encoding(self): + c = Constants() + c.titles.add_with_encoding(b"b\351ck", encoding="latin_1") + assert "béck" in c.titles + + +class TestNickname: + @pytest.mark.parametrize("entry", load_bank("nickname"), ids=make_ids) + def test_json_nickname(self, entry): + dict_entry_test(entry) + + # http://code.google.com/p/python-nameparser/issues/detail?id=17 + def test_parenthesis_are_removed_from_name(self): + hn = HumanName("John Jones (Unknown)") + assert hn.first == "John" + assert hn.last == "Jones" + assert hn.nickname != CONSTANTS.empty_attribute_default + + # http://code.google.com/p/python-nameparser/issues/detail?id=17 + # not testing nicknames because we don't actually care about Google Docs here + def test_duplicate_parenthesis_are_removed_from_name(self): + hn = HumanName("John Jones (Google Docs), Jr. (Unknown)") + assert hn.first == "John" + assert hn.last == "Jones" + assert hn.suffix == "Jr." + assert hn.nickname != CONSTANTS.empty_attribute_default + + @pytest.mark.xfail + def test_nickname_and_last_name_with_title(self): + hn = HumanName('Senator "Rick" Edmonds') + assert hn.title == "Senator" + assert hn.first == CONSTANTS.empty_attribute_default + assert hn.last == "Edmonds" + assert hn.nickname == "Rick" + + +class TestPrefixes: + @pytest.mark.parametrize("entry", load_bank("prefix"), ids=make_ids) + def test_json_prefix(self, entry): + dict_entry_test(entry) + + +class TestSuffixes: + @pytest.mark.parametrize("entry", load_bank("suffix"), ids=make_ids) + def test_json_suffix(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail( + reason="TODO: handle conjunctions in last names" + " followed by first names clashing with suffixes" + ) + def test_potential_suffix_that_is_also_first_name_comma_with_conjunction(self): + hn = HumanName("De la Vina, Bart") + assert hn.first == "Bart" + assert hn.last == "De la Vina" + + @pytest.mark.xfail(reason="https://github.com/derek73/python-nameparser/issues/27") + def test_king(self): + hn = HumanName("Dr King Jr") + assert hn.title == "Dr" + assert hn.last == "King" + assert hn.suffix == "Jr" + + +class TestTitle: + @pytest.mark.parametrize("entry", load_bank("title"), ids=make_ids) + def test_json_title(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail(reason="TODO: fix handling of U.S.") + def test_chained_title_first_name_title_is_initials(self): + hn = HumanName("U.S. District Judge Marc Thomas Treadwell") + assert hn.title == "U.S. District Judge" + assert hn.first == "Marc" + assert hn.middle == "Thomas" + assert hn.last == "Treadwell" + + @pytest.mark.xfail( + reason=" 'ben' is removed from PREFIXES in v0.2.5" + "this test could re-enable this test if we decide to support 'ben' as a prefix" + ) + def test_title_multiple_titles_with_apostrophe_s(self): + hn = HumanName("The Right Hon. the President of the Queen's Bench Division") + assert hn.title == "The Right Hon. the President of the Queen's Bench Division" + + @pytest.mark.xfail + def test_ben_as_conjunction(self): + hn = HumanName("Ahmad ben Husain") + assert hn.first == "Ahmad" + assert hn.last == "ben Husain" + + +class TestHumanNameCapitalization: + @pytest.mark.parametrize("entry", load_bank("capitalization"), ids=make_ids) + def test_json_capitalization(self, entry): + hn = HumanName(entry["raw"]) + hn.capitalize() + if sys.version_info.major < 3: + assert u(hn) == entry["string"] + else: + assert str(hn) == entry["string"] + + @pytest.mark.parametrize( + "name, is_forced", + [ + ("Shirley Maclaine", {True: "Shirley MacLaine", False: "Shirley Maclaine"}), + ("Baron Mcyolo", {True: "Baron McYolo", False: "Baron Mcyolo"}), + ], + ) + @pytest.mark.parametrize("force", [True, False]) + def test_no_capitalization_change_unless_forced(self, name, is_forced, force): + hn = HumanName(name) + hn.capitalize(force=force) + assert str(hn) == is_forced[force] + + @pytest.mark.xfail( + reason="FIXME: this test does not pass due to a known issue " + "http://code.google.com/p/python-nameparser/issues/detail?id=22" + ) + def test_capitalization_exception_for_already_capitalized_III_KNOWN_FAILURE(self): + hn = HumanName("juan garcia III") + hn.capitalize() + assert str(hn) == "Juan Garcia III" + + +class TestHumanNameOutputFormat: + def test_formatting_init_argument(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)", string_format="TEST1") + assert u(hn) == "TEST1" + + def test_formatting_constants_attribute(self): + from nameparser.config import CONSTANTS + + _orig = CONSTANTS.string_format + CONSTANTS.string_format = "TEST2" + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + assert u(hn) == "TEST2" + CONSTANTS.string_format = _orig + + def test_quote_nickname_formating(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{last}, {title} {first} {middle}, {suffix} '{nickname}'" + assert u(hn) == "Doe, Rev John A. Kenneth, III 'Kenny'" + + def test_formating_removing_keys_from_format_string(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{last}, {title} {first} {middle}, {suffix}" + assert u(hn) == "Doe, Rev John A. Kenneth, III" + hn.string_format = "{last}, {title} {first} {middle}" + assert u(hn) == "Doe, Rev John A. Kenneth" + hn.string_format = "{last}, {first} {middle}" + assert u(hn) == "Doe, John A. Kenneth" + hn.string_format = "{last}, {first}" + assert u(hn) == "Doe, John" + hn.string_format = "{first} {last}" + assert u(hn) == "John Doe" + + def test_formating_removing_pieces_from_name_buckets(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{title} {first} {middle} {last} {suffix}" + assert u(hn) == "Rev John A. Kenneth Doe III" + hn.middle = "" + assert u(hn) == "Rev John Doe III" + hn.suffix = "" + assert u(hn) == "Rev John Doe" + hn.title = "" + assert u(hn) == "John Doe" + + def test_formating_of_nicknames_with_parenthesis(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" + assert u(hn) == "Rev John A. Kenneth Doe III (Kenny)" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_with_single_quotes(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_with_double_quotes(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = '{title} {first} {middle} {last} {suffix} "{nickname}"' + assert u(hn) == 'Rev John A. Kenneth Doe III "Kenny"' + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_in_middle(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} ({nickname}) {middle} {last} {suffix}" + assert u(hn) == "Rev John (Kenny) A. Kenneth Doe III" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_remove_emojis(self): + hn = HumanName("Sam Smith 😊") + assert hn.first == "Sam" + assert hn.last == "Smith" + assert u(hn) == "Sam Smith" + + def test_keep_non_emojis(self): + hn = HumanName("∫≜⩕ Smith 😊") + assert hn.first == "∫≜⩕" + assert hn.last == "Smith" + assert u(hn) == "∫≜⩕ Smith" + + def test_keep_emojis(self): + constants = Constants() + constants.regexes.emoji = False + hn = HumanName("∫≜⩕ Smith😊", constants) + assert hn.first == "∫≜⩕" + assert hn.last == "Smith😊" + assert u(hn) == "∫≜⩕ Smith😊" + # test cleanup + + +class TestHumanNameVariations: + """Test automated variations of names in TEST_NAMES. + + Helps test that the 3 code trees work the same""" + + @pytest.mark.parametrize("name", load_bank("bare_names")) + def test_json_variations(self, name): + self.run_variations(name) + + def run_variations(self, name): + """ Run several variations + + This is a separate function so that individual non-parametrized tests can be + added if desired. + """ + hn = HumanName(name) + if len(hn.suffix_list) > 1: + hn = HumanName( + "{title} {first} {middle} {last} {suffix}".format(**hn.as_dict()).split( + "," + )[0] + ) + # format strings below require empty string + hn.C.empty_attribute_default = "" + hn_dict = hn.as_dict() + nocomma = HumanName( + "{title} {first} {middle} {last} {suffix}".format(**hn_dict) + ) + lastnamecomma = HumanName( + "{last}, {title} {first} {middle} {suffix}".format(**hn_dict) + ) + if hn.suffix: + suffixcomma = HumanName( + "{title} {first} {middle} {last}, {suffix}".format(**hn_dict) + ) + if hn.nickname: + nocomma = HumanName( + "{title} {first} {middle} {last} {suffix} ({nickname})".format( + **hn_dict + ) + ) + lastnamecomma = HumanName( + "{last}, {title} {first} {middle} {suffix} ({nickname})".format( + **hn_dict + ) + ) + if hn.suffix: + suffixcomma = HumanName( + "{title} {first} {middle} {last}, {suffix} ({nickname})".format( + **hn_dict + ) + ) + for attr in hn._members: + assert getattr(hn, attr) == getattr(nocomma, attr) + assert getattr(hn, attr) == getattr(lastnamecomma, attr) + if hn.suffix: + assert getattr(hn, attr) == getattr(suffixcomma, attr) + + +class TestMaidenName: + + no_maiden_names = getattr(HumanName(), "maiden", None) is None + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_parenthesis_and_quotes_together(self): + hn = HumanName("Jennifer 'Jen' Jones (Duff)") + assert hn.first == "Jennifer" + assert hn.last == "Jones" + assert hn.nickname == "Jen" + assert hn.maiden == "Duff" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood nee Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_accented_nee(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood née Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee_and_comma(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood, née Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee_with_parenthesis(self): + hn = HumanName("Mary Toogood (nee Johnson)") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_parenthesis(self): + hn = HumanName("Mary Toogood (Johnson)") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + +if __name__ == "__main__": + # Pass through any/all arguments to pytest + pytest.main(sys.argv) diff --git a/pytest/pytest.ini b/pytest/pytest.ini new file mode 100644 index 0000000..b0e5a94 --- /dev/null +++ b/pytest/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore::DeprecationWarning \ No newline at end of file