Skip to content

Commit

Permalink
chores: polish unstructured io (camel-ai#412)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wendong-Fan authored Dec 28, 2023
1 parent 78b7046 commit 87ad938
Show file tree
Hide file tree
Showing 3 changed files with 350 additions and 443 deletions.
11 changes: 5 additions & 6 deletions camel/functions/unstructured_io_fuctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,14 @@ def ensure_unstructured_version(self, min_version: str) -> None:
def parse_file_or_url(
self,
input_path: str,
**kwargs: Any,
) -> Union[Any, List[Any]]:
r"""Loads a file or a URL and parses its contents as unstructured data.
Args:
input_path (str): Path to the file or URL to be parsed.
**kwargs Extra kwargs passed to the partition function
Returns:
List[Any]: The elements after parsing the file or URL, could be a
dict, list, etc., depending on the content. If return_str is
Expand Down Expand Up @@ -115,7 +118,7 @@ def parse_file_or_url(
from unstructured.partition.html import partition_html

try:
elements = partition_html(url=input_path)
elements = partition_html(url=input_path, **kwargs)
return elements
except Exception as e:
raise Exception("Failed to parse the URL.") from e
Expand All @@ -132,7 +135,7 @@ def parse_file_or_url(
# Read the file
try:
with open(input_path, "rb") as f:
elements = partition(file=f)
elements = partition(file=f, **kwargs)
return elements
except Exception as e:
raise Exception(
Expand Down Expand Up @@ -324,7 +327,6 @@ def stage_elements(self, elements: List[Any], stage_type: str,
'dict_to_elements',
'stage_csv_for_prodigy',
'stage_for_prodigy',
'stage_for_argilla',
'stage_for_baseplate',
'stage_for_datasaur',
'stage_for_label_box',
Expand All @@ -345,7 +347,6 @@ def stage_elements(self, elements: List[Any], stage_type: str,
"""

from unstructured.staging import (
argilla,
base,
baseplate,
datasaur,
Expand All @@ -370,8 +371,6 @@ def stage_elements(self, elements: List[Any], stage_type: str,
"stage_for_prodigy":
lambda els, **kw: prodigy.stage_for_prodigy(
els, kw.get('metadata', [])),
"stage_for_argilla":
lambda els, **kw: argilla.stage_for_argilla(els, **kw),
"stage_for_baseplate":
baseplate.stage_for_baseplate,
"stage_for_datasaur":
Expand Down
Loading

0 comments on commit 87ad938

Please sign in to comment.