temp fix for ContentFormat import bug

2025-01-09 16:03:35 -05:00 · 2025-01-09 16:03:35 -05:00 · b211ddbe82
commit b211ddbe82
parent 811e4413aa
1 changed files with 6 additions and 2 deletions
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@ -38,10 +38,12 @@ from azure.ai.documentintelligence import DocumentIntelligenceClient
 from azure.ai.documentintelligence.models import (
    AnalyzeDocumentRequest,
    AnalyzeResult,
-    ContentFormat,
    DocumentAnalysisFeature,
 )
 from azure.identity import DefaultAzureCredential
+# TODO: currently, there is a bug in the document intelligence SDK with importing the "ContentFormat" enum.
+# This constant is a temporary fix until the bug is resolved.
+CONTENT_FORMAT = "markdown"

 # Optional Transcription support
 IS_AUDIO_TRANSCRIPTION_CAPABLE = False
@ -1367,7 +1369,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
            model_id="prebuilt-layout",
            analyze_request=AnalyzeDocumentRequest(bytes_source=file_bytes),
            features=analysis_features,
-            output_content_format=ContentFormat.MARKDOWN,
+            output_content_format=CONTENT_FORMAT, # TODO: replace with "ContentFormat.MARKDOWN" when the bug is fixed
        )
        result: AnalyzeResult = poller.result()

@ -1446,6 +1448,8 @@ class MarkItDown:

        if docintel_endpoint is not None:
            self._docintel_converter = DocumentIntelligenceConverter(endpoint=docintel_endpoint)
+        else:
+            self._docintel_converter = None

        self._page_converters: List[DocumentConverter] = []