Skip to content

Commit

Permalink
feat: Add support to more FHIR resources and improve the organization…
Browse files Browse the repository at this point in the history
… of the modules (#20)
  • Loading branch information
xmnlab authored Dec 10, 2024
1 parent 77f7e3c commit 69e4bfa
Show file tree
Hide file tree
Showing 279 changed files with 5,411 additions and 5,288 deletions.
11 changes: 10 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,18 @@ default_stages:
- pre-commit
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-json
- id: check-toml
- id: check-xml
- id: debug-statements
- id: check-builtin-literals
- id: check-case-conflict
- id: check-docstring-first
- id: detect-private-key

- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v3.0.2"
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,15 @@ warn_redundant_casts = true
warn_unused_configs = true
show_error_codes = true
exclude = ["scripts/"]
plugins = ["pydantic.mypy"]

[[tool.mypy.overrides]]
module = [
"langchain_experimental.sql",
]
ignore_missing_imports = true

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
7 changes: 0 additions & 7 deletions src/anamnesisai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from importlib import metadata as importlib_metadata

from anamnesisai.core import extract_fhir_openai


def get_version() -> str:
"""Return the program version."""
Expand All @@ -18,8 +16,3 @@ def get_version() -> str:
__version__ = version
__author__ = "Satarupa Deb, Ivan Ogasawara"
__email__ = "satarupa2212@gmail.com, ivan.ogasawara@gmail.com"


__all__ = [
"extract_fhir_openai",
]
54 changes: 54 additions & 0 deletions src/anamnesisai/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Set of functions and variables used for configuration."""

from __future__ import annotations

# prompt template for getting the given FHIR resource from the given
# conversation between m.d. and patient.
PROMPT_TEMPLATE = """
You are a FHIR Resource generating expert. Given a conversion
between doctor and patient, first create a syntactically correct
FHIR resource in pure JSON Format as specified by the user then look at
the results and return the FHIR resource to the input conversation.
Never create random values for values that are not present in the
conversation. You must return only the columns if the value is present
in the conversation. Extract and generate only the following FHIR
resources from the conversations and exams:
{resource_detail}
Use clear and concise language for each resource. Maintain patient
confidentiality and adhere to HIPAA regulations. Strive for accuracy
and consistency in your FHIR structures.
In the conversation, `D:` means it is from the Doctor, and `P:` means
it is from the Patience.
Conversation:
```
{context}
```
""".strip()

# prompt template for checking possible FHIR resources present in the given
# conversation
PROMPT_TEMPLATE_POSSIBLE_RESOURCES = """
please read the conversation between patient and md doctor in the given
context:
```
{context}
```
In the conversation, `D:` means it is from the Doctor, and `P:` means
it is from the Patience.
Question: what fhir resource/types could be extract from this conversation?
note: I don't need the data just the name of the types/resources
"""


__all__ = [
"PROMPT_TEMPLATE",
"PROMPT_TEMPLATE_POSSIBLE_RESOURCES",
]
96 changes: 0 additions & 96 deletions src/anamnesisai/core.py

This file was deleted.

94 changes: 94 additions & 0 deletions src/anamnesisai/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Anamnesis AI core functions."""

from __future__ import annotations

import logging

from typing import cast

from fhir.resources.resource import Resource
from rago.generation import OpenAIGen
from typeguard import typechecked

from anamnesisai.config import (
PROMPT_TEMPLATE,
PROMPT_TEMPLATE_POSSIBLE_RESOURCES,
)
from anamnesisai.supported_fhir import (
RESOURCES_CLASSES,
FHIRResourceFoundModel,
)
from anamnesisai.utils import get_resource_detail

# this should be move to another module
# maybe it would be good to get the logging level from an environment variable
logging.basicConfig(
level=logging.DEBUG, # Set the logging level to DEBUG
format="%(asctime)s - %(levelname)s - %(message)s", # Log format
)


@typechecked
def check_possible_fhir_resources(
text: str, api_key: str
) -> FHIRResourceFoundModel:
"""Check possible FHIR resources from the given text."""
try:
gen = OpenAIGen(
prompt_template=PROMPT_TEMPLATE_POSSIBLE_RESOURCES,
model_name="gpt-4o-mini",
api_key=api_key,
output_max_length=10384, # note: calc this number
structured_output=FHIRResourceFoundModel,
)
# the query is already present in the prompt template
result = gen.generate(query="", context=[text])
except Exception as e:
logging.debug(str(e))
return FHIRResourceFoundModel()

return cast(FHIRResourceFoundModel, result)


@typechecked
def extract_fhir(text: str, api_key: str) -> dict[str, Resource]:
"""Extract FHIR from the given text."""
possible_fhir = check_possible_fhir_resources(text, api_key)

results: dict[str, Resource] = {}
for fhir_class in RESOURCES_CLASSES:
resource_name = fhir_class.__name__

if not getattr(possible_fhir, resource_name, False):
logging.debug(
f"{resource_name} resource not found in the conversation."
)
continue

resource_detail = get_resource_detail(fhir_class)

resource_prompt = (
f"```Resource name: {resource_name}```\n"
f"```Resource explanation: {resource_detail}```\n"
)
prompt_template = PROMPT_TEMPLATE.replace(
"{resource_detail}",
resource_prompt,
)
try:
gen = OpenAIGen(
prompt_template=prompt_template,
model_name="gpt-4o-mini",
api_key=api_key,
output_max_length=10384, # note: calc this number
structured_output=fhir_class,
)
# the query is already present in the prompt template
result = gen.generate(query="", context=[text])
except Exception as e:
logging.warning(str(e))
continue

fhir_obj = cast(Resource, result)
results[resource_name] = fhir_obj
return results
41 changes: 41 additions & 0 deletions src/anamnesisai/supported_fhir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Gather all FHIR resources that is supported by Anamnesis.ai."""

from __future__ import annotations

from fhir.resources.allergyintolerance import AllergyIntolerance
from fhir.resources.condition import Condition
from fhir.resources.diagnosticreport import DiagnosticReport
from fhir.resources.encounter import Encounter
from fhir.resources.familymemberhistory import FamilyMemberHistory
from fhir.resources.immunization import Immunization
from fhir.resources.medicationstatement import MedicationStatement
from fhir.resources.observation import Observation
from fhir.resources.patient import Patient
from fhir.resources.practitioner import Practitioner
from fhir.resources.procedure import Procedure
from pydantic import create_model

RESOURCES_CLASSES = (
Patient,
Condition,
Practitioner,
Encounter,
Observation,
FamilyMemberHistory,
AllergyIntolerance,
Immunization,
MedicationStatement,
Procedure,
DiagnosticReport,
)

fields = {cls.__name__: (bool, ...) for cls in RESOURCES_CLASSES}
FHIRResourceFoundModel = create_model( # type: ignore[call-overload]
"FHIRResourceFoundModel",
**fields,
)

__all__ = [
"RESOURCES_CLASSES",
"FHIRResourceFoundModel",
]
22 changes: 22 additions & 0 deletions src/anamnesisai/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Set of generic functions that supports other modules."""

from __future__ import annotations

from typing import Type

from fhir.resources.resource import Resource
from typeguard import typechecked


@typechecked
def get_resource_detail(resource_class: Type[Resource]) -> str:
"""Get the resource detail from the resource class."""
# note: remove the first part because it is a just disclaimer about the
# python object.
idx = 0
docstring = resource_class.__doc__ or ""
try:
idx = docstring.index("\n\n")
except ValueError:
pass
return " ".join(docstring[idx:].replace("\n", " ").strip().split())
Loading

0 comments on commit 69e4bfa

Please sign in to comment.