Skip to content

Commit

Permalink
langchain[patch]: Mathpix PDF loader supports arbitrary extra params (l…
Browse files Browse the repository at this point in the history
…angchain-ai#13950)

- **Description:** Support providing whatever extra parameters you want
to the Mathpix PDF loader API request.
  - **Issue:** langchain-ai#12773
  - **Dependencies:** None

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
  • Loading branch information
Chad Norvell and baskaryan authored Nov 29, 2023
1 parent 9e2ae86 commit 1c4bfb8
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion libs/langchain/langchain/document_loaders/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ def __init__(
processed_file_format: str = "md",
max_wait_time_seconds: int = 500,
should_clean_pdf: bool = False,
extra_request_data: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Initialize with a file path.
Expand All @@ -382,6 +383,7 @@ def __init__(
max_wait_time_seconds: a maximum time to wait for the response from
the server. Default is 500.
should_clean_pdf: a flag to clean the PDF file. Default is False.
extra_request_data: Additional request data.
**kwargs: additional keyword arguments.
"""
self.mathpix_api_key = get_from_dict_or_env(
Expand All @@ -392,6 +394,9 @@ def __init__(
)
super().__init__(file_path, **kwargs)
self.processed_file_format = processed_file_format
self.extra_request_data = (
extra_request_data if extra_request_data is not None else {}
)
self.max_wait_time_seconds = max_wait_time_seconds
self.should_clean_pdf = should_clean_pdf

Expand All @@ -405,7 +410,10 @@ def url(self) -> str:

@property
def data(self) -> dict:
options = {"conversion_formats": {self.processed_file_format: True}}
options = {
"conversion_formats": {self.processed_file_format: True},
**self.extra_request_data,
}
return {"options_json": json.dumps(options)}

def send_pdf(self) -> str:
Expand Down

0 comments on commit 1c4bfb8

Please sign in to comment.