Skip to content

Commit a431454

Browse files
committed
revert version bump of unstructured
1 parent 16fceb1 commit a431454

File tree

3 files changed

+78
-180
lines changed

3 files changed

+78
-180
lines changed

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,9 +378,15 @@ def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileT
378378
# detect_filetype is either using the file name or file content
379379
# if possible, try to leverage the file name to detect the file type
380380
# if the file name is not available, use the file content
381-
file_type = detect_filetype(
382-
file_path=remote_file.uri,
383-
)
381+
file_type: FileType | None = None
382+
try:
383+
file_type = detect_filetype(
384+
file_path=remote_file.uri,
385+
)
386+
except Exception:
387+
# Path doesn't exist locally. Try something else...
388+
pass
389+
384390
if file_type is not None and not file_type == FileType.UNK:
385391
return file_type
386392

poetry.lock

Lines changed: 68 additions & 176 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ nltk = { version = "3.9.1", optional = true }
7373
# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen.
7474
# airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict.
7575
numpy = "<2"
76-
unstructured = { version = "^0.16.5", extras = ["docx", "pptx"], optional = true }
76+
unstructured = { version = "0.10.27", extras = ["docx", "pptx"], optional = true }
7777
"unstructured.pytesseract" = { version = ">=0.3.12", optional = true }
7878
pyjwt = "^2.8.0"
7979
cryptography = ">=42.0.5,<44.0.0"

0 commit comments

Comments
 (0)