Skip to content

Commit

Permalink
Merge pull request #132 from opensanctions/openfigi
Browse files Browse the repository at this point in the history
Add OpenFIGI enricher
  • Loading branch information
pudo authored Nov 15, 2023
2 parents 507dcaf + c7dec0a commit 4a3333d
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 0 deletions.
13 changes: 13 additions & 0 deletions nomenklatura/enrich/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import logging
import time
from banal import as_bool
from typing import Union, Any, Dict, Optional, Generator
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -101,6 +102,7 @@ def http_post_json_cached(
cache_key: str,
json: Any,
cache_days: Optional[int] = None,
retry: int = 3,
) -> Any:
cache_days_ = self.cache_days if cache_days is None else cache_days
resp_data = self.cache.get_json(cache_key, max_age=cache_days_)
Expand All @@ -111,6 +113,17 @@ def http_post_json_cached(
except RequestException as rex:
if rex.response is not None and rex.response.status_code in (401, 403):
raise EnrichmentAbort("Authorization failure: %s" % url) from rex
if rex.response is not None and rex.response.status_code == 429:
if retry > 0:
log.info("Rate limit exceeded. Sleeping for 60s.")
time.sleep(61)
return self.http_post_json_cached(
url, cache_key, json, cache_days, retry - 1
)
else:
raise EnrichmentAbort(
"Rate limit exceeded and out of retries: %s" % url
) from rex
msg = "HTTP POST failed [%s]: %s" % (url, rex)
raise EnrichmentException(msg) from rex
resp_data = resp.json()
Expand Down
82 changes: 82 additions & 0 deletions nomenklatura/enrich/openfigi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import logging
from typing import Any, Generator, Dict, List
from urllib.parse import urljoin
from followthemoney.util import make_entity_id
from normality import slugify

from nomenklatura.entity import CE
from nomenklatura.dataset import DS
from nomenklatura.cache import Cache
from nomenklatura.enrich.common import Enricher, EnricherConfig

log = logging.getLogger(__name__)

URL = "https://api.openfigi.com/v3/search"


class OpenFIGIEnricher(Enricher):
"""Uses the `OpenFIGI` search API to look up FIGIs by company name."""

def __init__(self, dataset: DS, cache: Cache, config: EnricherConfig):
super().__init__(dataset, cache, config)

api_key = os.environ.get("OPENFIGI_API_KEY")
if api_key is not None:
self.session.headers["X-OPENFIGI-APIKEY"] = api_key

def make_company_id(self, name: str) -> str:
return f"figi-co-{make_entity_id(name)}"

def make_security_id(self, figi: str) -> str:
return f"figi-id-{slugify(figi, sep='-')}"

def search(self, query: str) -> Generator[Dict[str, str], None, None]:
body = {"query": query}
next = None

while True:
if next is not None:
body["start"] = next

log.info(f"Searching {query}. Offset={next}")
cache_key = f"{URL}:{query}:{next}"
resp = self.http_post_json_cached(URL, cache_key, body)
if "data" in resp:
yield from resp["data"]

next = resp.get("next", None)
if next is None:
break

def match(self, entity: CE) -> Generator[CE, None, None]:
for name in entity.get("name"):
for match in self.search(name):
match_name = match.get("name", None)
if match_name is None:
continue
other = self.make_entity(entity, "Company")
other.id = self.make_company_id(match_name)
other.add("name", match_name)
yield other

def expand(self, entity: CE, match: CE) -> Generator[CE, None, None]:
name = match.get("name")[0]
for item in self.search(name):

# Only emit the securities which match the name of the positive match
# to the company exactly. Skip everything else.
if item["name"] != name:
continue

security = self.make_entity(match, "Security")
security.id = self.make_security_id(item["figi"])
security.add("name", item["figi"])
security.add("issuer", match)
security.add("ticker", item["ticker"])
security.add("type", item["securityType"])
if item["exchCode"] is not None:
security.add("notes", f'exchange {item["exchCode"]}')
security.add("description", item["securityDescription"])

yield security
71 changes: 71 additions & 0 deletions tests/enrich/test_openfigi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests_mock
from nomenklatura.cache import Cache
from nomenklatura.dataset import Dataset
from nomenklatura.enrich import get_enricher
from nomenklatura.enrich.common import Enricher
from nomenklatura.entity import CompositeEntity


PATH = "nomenklatura.enrich.openfigi:OpenFIGIEnricher"
RESPONSE = {
"data": [
{
"figi": "BBG0005S7P81",
"securityType": "EURO-DOLLAR",
"marketSector": "Govt",
"ticker": "BKRUSS F 12/31/01",
"name": "CENTRAL BANK OF RUSSIA",
"exchCode": "NOT LISTED",
"shareClassFIGI": None,
"compositeFIGI": None,
"securityType2": None,
"securityDescription": "BKRUSS Float 12/31/01",
},
{
"figi": "BBG002T3FYF0",
"securityType": "Index",
"marketSector": "Index",
"ticker": "RCRAMAR",
"name": "Bank of Russia Russia Central",
"exchCode": None,
"shareClassFIGI": None,
"compositeFIGI": None,
"securityType2": None,
"securityDescription": "Bank of Russia Russia Central",
},
]
}


dataset = Dataset.make({"name": "ext_open_figi", "title": "OpenFIGI"})


def load_enricher():
enricher_cls = get_enricher(PATH)
assert issubclass(enricher_cls, Enricher)
cache = Cache.make_default(dataset)
return enricher_cls(dataset, cache, {})


def test_nominatim_match():
enricher = load_enricher()
with requests_mock.Mocker() as m:
m.post("/v3/search", json=RESPONSE)

data = {
"schema": "Company",
"id": "xxx",
"properties": {"name": ["Bank of Russia"]},
}
ent = CompositeEntity.from_data(dataset, data)
m_results = list(enricher.match(ent))
assert len(m_results) == 2, m_results
m1 = m_results[0]
m2 = m_results[1]
assert m1.get("name") == ["CENTRAL BANK OF RUSSIA"], m1
assert m2.get("name") == ["Bank of Russia Russia Central"], m2

e_results = list(enricher.expand(ent, m_results[0]))
assert len(e_results) == 1, e_results
assert e_results[0].get("ticker") == ["BKRUSS F 12/31/01"], e_results
assert e_results[0].get("issuer") == [m_results[0].id], e_results

0 comments on commit 4a3333d

Please sign in to comment.