Skip to content

Commit

Permalink
resolver_statements passes
Browse files Browse the repository at this point in the history
  • Loading branch information
jbothma committed Jan 3, 2025
1 parent af68f71 commit 6a4d131
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 21 deletions.
32 changes: 24 additions & 8 deletions nomenklatura/resolver/resolver.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
import logging
import getpass
from pathlib import Path
from threading import RLock
from functools import lru_cache
from collections import defaultdict
from typing import Dict, Generator, List, Optional, Set, Tuple
from functools import lru_cache
from pathlib import Path
from sqlalchemy import MetaData, insert, or_, alias, func
from sqlalchemy import Table, Column, Unicode, Float
from sqlalchemy.engine import Engine, Connection
from sqlalchemy.sql.expression import select, delete, update
from threading import RLock
from typing import Dict, Generator, List, Optional, Set, Tuple
from urllib.parse import urlunparse
import getpass
import logging

from followthemoney.types import registry

from rigour.ids.wikidata import is_qid
from rigour.time import utc_now

from nomenklatura.db import get_upsert_func
from nomenklatura.entity import CE
from nomenklatura.judgement import Judgement
from nomenklatura.resolver.identifier import Identifier, StrIdent, Pair
from nomenklatura.resolver.edge import Edge
from nomenklatura.resolver.identifier import Identifier, StrIdent, Pair
from nomenklatura.resolver.linker import Linker
from nomenklatura.statement.statement import Statement
from nomenklatura.util import PathLike
from nomenklatura.db import get_upsert_func

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -382,6 +386,18 @@ def prune(self, conn: Connection) -> None:
conn.execute(stmt)
self.connected.cache_clear()

def apply_statement(self, conn: Connection, stmt: Statement) -> Statement:
if stmt.entity_id is not None:
stmt.canonical_id = self.get_canonical(conn, stmt.entity_id)
if stmt.prop_type == registry.entity.name:
canon_value = self.get_canonical(conn, stmt.value)
if canon_value != stmt.value:
if stmt.original_value is None:
stmt.original_value = stmt.value
# NOTE: this means the key is out of whack here now
stmt.value = canon_value
return stmt

def save(self, conn: Connection, path: PathLike) -> None:
"""Store the resolver adjacency list to a plain text JSON list."""
with self.lock:
Expand Down
32 changes: 19 additions & 13 deletions tests/test_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,22 @@ def test_resolver_candidates():


def test_resolver_statements():
resolver = Resolver()
canon = resolver.decide("a1", "a2", Judgement.POSITIVE)
resolver.decide("a2", "b2", Judgement.NEGATIVE)

stmt = Statement("a1", "holder", "Passport", "b2", "test")
stmt = resolver.apply_statement(stmt)
assert stmt.canonical_id == str(canon)
assert stmt.value == "b2"

resolver = Resolver()
stmt = resolver.apply_statement(stmt)
assert stmt.canonical_id == "a1"
assert stmt.value == "b2"
engine = get_engine()
metadata = get_metadata()

with engine.begin() as conn:
resolver = Resolver(engine=engine, metadata=metadata, create=True)
canon = resolver.decide(conn, "a1", "a2", Judgement.POSITIVE)
resolver.decide(conn, "a2", "b2", Judgement.NEGATIVE)

stmt = Statement("a1", "holder", "Passport", "b2", "test")
stmt = resolver.apply_statement(conn, stmt)
assert stmt.canonical_id == str(canon)
assert stmt.value == "b2"

other = Resolver(
engine=engine, metadata=metadata, table_name="other", create=True
)
stmt = other.apply_statement(conn, stmt)
assert stmt.canonical_id == "a1"
assert stmt.value == "b2"

0 comments on commit 6a4d131

Please sign in to comment.