Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenFIGI enricher #132

Merged
merged 5 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions nomenklatura/enrich/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import logging
import time
from banal import as_bool
from typing import Union, Any, Dict, Optional, Generator
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -111,6 +112,10 @@ def http_post_json_cached(
except RequestException as rex:
if rex.response is not None and rex.response.status_code in (401, 403):
raise EnrichmentAbort("Authorization failure: %s" % url) from rex
if rex.response is not None and rex.response.status_code == 429:
log.info("Rate limit exceeded. Sleeping for 60s.")
time.sleep(61)
return self.http_post_json_cached(url, cache_key, json, cache_days)
msg = "HTTP POST failed [%s]: %s" % (url, rex)
raise EnrichmentException(msg) from rex
resp_data = resp.json()
Expand Down
77 changes: 77 additions & 0 deletions nomenklatura/enrich/openfigi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
import logging
from typing import Any, Generator, Dict, List
from urllib.parse import urljoin
from followthemoney.util import make_entity_id
from normality import slugify

from nomenklatura.entity import CE
from nomenklatura.dataset import DS
from nomenklatura.cache import Cache
from nomenklatura.enrich.common import Enricher, EnricherConfig

log = logging.getLogger(__name__)

URL = "https://api.openfigi.com/v3/search"


class OpenFIGIEnricher(Enricher):
"""Uses the `OpenFIGI` search API to look up FIGIs by company name."""

def __init__(self, dataset: DS, cache: Cache, config: EnricherConfig):
super().__init__(dataset, cache, config)

api_key = os.environ.get("OPENFIGI_API_KEY")
self.session.headers["X-OPENFIGI-APIKEY"] = api_key

def make_company_id(self, name):
return f"figi-co-{make_entity_id(name)}"

def make_security_id(self, figi):
return f"figi-id-{slugify(figi, sep='-')}"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should these perhaps rather be something like ofg-{hash} for companies and figi-{figi} for figis?


def search(self, query):
body = {"query": query}
next = None

while True:
if next is not None:
body["start"] = next

log.info(f"Searching {query}. Offset={next}")
cache_key = f"{URL}:{query}:{next}"
resp = self.http_post_json_cached(URL, cache_key, body)
yield from resp["data"]

next = resp.get("next", None)
if next is None:
break

def match(self, entity: CE) -> Generator[CE, None, None]:
for name in entity.get("name"):
for match in self.search(name):
other = self.make_entity(entity, "Company")
name = match.get("name", None)
if name is None:
continue
other.id = self.make_company_id(name)
other.add("name", name)
yield other

def expand(self, entity: CE, match: CE) -> Generator[CE, None, None]:
yield match

name = match.get("name")[0]
for item in self.search(name):
if item["name"] != name:
continue

security = self.make_entity(match, "Security")
security.id = self.make_security_id(item["figi"])
security.add("name", item["figi"])
security.add("issuer", match)
security.add("ticker", item["ticker"])
security.add("type", item["securityType"])
security.add("notes", f'exchange {item["exchCode"]}')

yield security
Loading