From 6a4d131999bfbf6dd1e2249324bcba053549444b Mon Sep 17 00:00:00 2001 From: JD Bothma Date: Fri, 3 Jan 2025 18:46:10 +0000 Subject: [PATCH] resolver_statements passes --- nomenklatura/resolver/resolver.py | 32 +++++++++++++++++++++++-------- tests/test_resolver.py | 32 ++++++++++++++++++------------- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/nomenklatura/resolver/resolver.py b/nomenklatura/resolver/resolver.py index 267523f..2f9c225 100644 --- a/nomenklatura/resolver/resolver.py +++ b/nomenklatura/resolver/resolver.py @@ -1,25 +1,29 @@ -import logging -import getpass -from pathlib import Path -from threading import RLock -from functools import lru_cache from collections import defaultdict -from typing import Dict, Generator, List, Optional, Set, Tuple +from functools import lru_cache +from pathlib import Path from sqlalchemy import MetaData, insert, or_, alias, func from sqlalchemy import Table, Column, Unicode, Float from sqlalchemy.engine import Engine, Connection from sqlalchemy.sql.expression import select, delete, update +from threading import RLock +from typing import Dict, Generator, List, Optional, Set, Tuple from urllib.parse import urlunparse +import getpass +import logging + +from followthemoney.types import registry + from rigour.ids.wikidata import is_qid from rigour.time import utc_now +from nomenklatura.db import get_upsert_func from nomenklatura.entity import CE from nomenklatura.judgement import Judgement -from nomenklatura.resolver.identifier import Identifier, StrIdent, Pair from nomenklatura.resolver.edge import Edge +from nomenklatura.resolver.identifier import Identifier, StrIdent, Pair from nomenklatura.resolver.linker import Linker +from nomenklatura.statement.statement import Statement from nomenklatura.util import PathLike -from nomenklatura.db import get_upsert_func log = logging.getLogger(__name__) @@ -382,6 +386,18 @@ def prune(self, conn: Connection) -> None: conn.execute(stmt) self.connected.cache_clear() + def apply_statement(self, conn: Connection, stmt: Statement) -> Statement: + if stmt.entity_id is not None: + stmt.canonical_id = self.get_canonical(conn, stmt.entity_id) + if stmt.prop_type == registry.entity.name: + canon_value = self.get_canonical(conn, stmt.value) + if canon_value != stmt.value: + if stmt.original_value is None: + stmt.original_value = stmt.value + # NOTE: this means the key is out of whack here now + stmt.value = canon_value + return stmt + def save(self, conn: Connection, path: PathLike) -> None: """Store the resolver adjacency list to a plain text JSON list.""" with self.lock: diff --git a/tests/test_resolver.py b/tests/test_resolver.py index ab305cb..d298b22 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -145,16 +145,22 @@ def test_resolver_candidates(): def test_resolver_statements(): - resolver = Resolver() - canon = resolver.decide("a1", "a2", Judgement.POSITIVE) - resolver.decide("a2", "b2", Judgement.NEGATIVE) - - stmt = Statement("a1", "holder", "Passport", "b2", "test") - stmt = resolver.apply_statement(stmt) - assert stmt.canonical_id == str(canon) - assert stmt.value == "b2" - - resolver = Resolver() - stmt = resolver.apply_statement(stmt) - assert stmt.canonical_id == "a1" - assert stmt.value == "b2" + engine = get_engine() + metadata = get_metadata() + + with engine.begin() as conn: + resolver = Resolver(engine=engine, metadata=metadata, create=True) + canon = resolver.decide(conn, "a1", "a2", Judgement.POSITIVE) + resolver.decide(conn, "a2", "b2", Judgement.NEGATIVE) + + stmt = Statement("a1", "holder", "Passport", "b2", "test") + stmt = resolver.apply_statement(conn, stmt) + assert stmt.canonical_id == str(canon) + assert stmt.value == "b2" + + other = Resolver( + engine=engine, metadata=metadata, table_name="other", create=True + ) + stmt = other.apply_statement(conn, stmt) + assert stmt.canonical_id == "a1" + assert stmt.value == "b2"