Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenFIGI enricher #132

Merged
merged 5 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions nomenklatura/enrich/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import logging
import time
from banal import as_bool
from typing import Union, Any, Dict, Optional, Generator
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -101,6 +102,7 @@ def http_post_json_cached(
cache_key: str,
json: Any,
cache_days: Optional[int] = None,
retry: int = 3,
) -> Any:
cache_days_ = self.cache_days if cache_days is None else cache_days
resp_data = self.cache.get_json(cache_key, max_age=cache_days_)
Expand All @@ -111,6 +113,17 @@ def http_post_json_cached(
except RequestException as rex:
if rex.response is not None and rex.response.status_code in (401, 403):
raise EnrichmentAbort("Authorization failure: %s" % url) from rex
if rex.response is not None and rex.response.status_code == 429:
if retry > 0:
log.info("Rate limit exceeded. Sleeping for 60s.")
time.sleep(61)
return self.http_post_json_cached(
url, cache_key, json, cache_days, retry - 1
)
else:
raise EnrichmentAbort(
"Rate limit exceeded and out of retries: %s" % url
) from rex
msg = "HTTP POST failed [%s]: %s" % (url, rex)
raise EnrichmentException(msg) from rex
resp_data = resp.json()
Expand Down
82 changes: 82 additions & 0 deletions nomenklatura/enrich/openfigi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import logging
from typing import Any, Generator, Dict, List
from urllib.parse import urljoin
from followthemoney.util import make_entity_id
from normality import slugify

from nomenklatura.entity import CE
from nomenklatura.dataset import DS
from nomenklatura.cache import Cache
from nomenklatura.enrich.common import Enricher, EnricherConfig

log = logging.getLogger(__name__)

URL = "https://api.openfigi.com/v3/search"


class OpenFIGIEnricher(Enricher):
"""Uses the `OpenFIGI` search API to look up FIGIs by company name."""

def __init__(self, dataset: DS, cache: Cache, config: EnricherConfig):
super().__init__(dataset, cache, config)

api_key = os.environ.get("OPENFIGI_API_KEY")
if api_key is not None:
self.session.headers["X-OPENFIGI-APIKEY"] = api_key

def make_company_id(self, name: str) -> str:
return f"figi-co-{make_entity_id(name)}"

def make_security_id(self, figi: str) -> str:
return f"figi-id-{slugify(figi, sep='-')}"

def search(self, query: str) -> Generator[Dict[str, str], None, None]:
body = {"query": query}
next = None

while True:
if next is not None:
body["start"] = next

log.info(f"Searching {query}. Offset={next}")
cache_key = f"{URL}:{query}:{next}"
resp = self.http_post_json_cached(URL, cache_key, body)
if "data" in resp:
yield from resp["data"]

next = resp.get("next", None)
if next is None:
break

def match(self, entity: CE) -> Generator[CE, None, None]:
for name in entity.get("name"):
for match in self.search(name):
match_name = match.get("name", None)
if match_name is None:
continue
other = self.make_entity(entity, "Company")
other.id = self.make_company_id(match_name)
other.add("name", match_name)
yield other

def expand(self, entity: CE, match: CE) -> Generator[CE, None, None]:
name = match.get("name")[0]
for item in self.search(name):

# Only emit the securities which match the name of the positive match
# to the company exactly. Skip everything else.
if item["name"] != name:
continue

security = self.make_entity(match, "Security")
security.id = self.make_security_id(item["figi"])
security.add("name", item["figi"])
security.add("issuer", match)
security.add("ticker", item["ticker"])
security.add("type", item["securityType"])
if item["exchCode"] is not None:
security.add("notes", f'exchange {item["exchCode"]}')
security.add("description", item["securityDescription"])

yield security
71 changes: 71 additions & 0 deletions tests/enrich/test_openfigi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests_mock
from nomenklatura.cache import Cache
from nomenklatura.dataset import Dataset
from nomenklatura.enrich import get_enricher
from nomenklatura.enrich.common import Enricher
from nomenklatura.entity import CompositeEntity


PATH = "nomenklatura.enrich.openfigi:OpenFIGIEnricher"
RESPONSE = {
"data": [
{
"figi": "BBG0005S7P81",
"securityType": "EURO-DOLLAR",
"marketSector": "Govt",
"ticker": "BKRUSS F 12/31/01",
"name": "CENTRAL BANK OF RUSSIA",
"exchCode": "NOT LISTED",
"shareClassFIGI": None,
"compositeFIGI": None,
"securityType2": None,
"securityDescription": "BKRUSS Float 12/31/01",
},
{
"figi": "BBG002T3FYF0",
"securityType": "Index",
"marketSector": "Index",
"ticker": "RCRAMAR",
"name": "Bank of Russia Russia Central",
"exchCode": None,
"shareClassFIGI": None,
"compositeFIGI": None,
"securityType2": None,
"securityDescription": "Bank of Russia Russia Central",
},
]
}


dataset = Dataset.make({"name": "ext_open_figi", "title": "OpenFIGI"})


def load_enricher():
enricher_cls = get_enricher(PATH)
assert issubclass(enricher_cls, Enricher)
cache = Cache.make_default(dataset)
return enricher_cls(dataset, cache, {})


def test_nominatim_match():
enricher = load_enricher()
with requests_mock.Mocker() as m:
m.post("/v3/search", json=RESPONSE)

data = {
"schema": "Company",
"id": "xxx",
"properties": {"name": ["Bank of Russia"]},
}
ent = CompositeEntity.from_data(dataset, data)
m_results = list(enricher.match(ent))
assert len(m_results) == 2, m_results
m1 = m_results[0]
m2 = m_results[1]
assert m1.get("name") == ["CENTRAL BANK OF RUSSIA"], m1
assert m2.get("name") == ["Bank of Russia Russia Central"], m2

e_results = list(enricher.expand(ent, m_results[0]))
assert len(e_results) == 1, e_results
assert e_results[0].get("ticker") == ["BKRUSS F 12/31/01"], e_results
assert e_results[0].get("issuer") == [m_results[0].id], e_results