temp fix for ContentFormat import bug

This commit is contained in:
Kenny Zhang 2025-01-09 16:03:35 -05:00
parent 811e4413aa
commit b211ddbe82

View file

@ -38,10 +38,12 @@ from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import ( from azure.ai.documentintelligence.models import (
AnalyzeDocumentRequest, AnalyzeDocumentRequest,
AnalyzeResult, AnalyzeResult,
ContentFormat,
DocumentAnalysisFeature, DocumentAnalysisFeature,
) )
from azure.identity import DefaultAzureCredential from azure.identity import DefaultAzureCredential
# TODO: currently, there is a bug in the document intelligence SDK with importing the "ContentFormat" enum.
# This constant is a temporary fix until the bug is resolved.
CONTENT_FORMAT = "markdown"
# Optional Transcription support # Optional Transcription support
IS_AUDIO_TRANSCRIPTION_CAPABLE = False IS_AUDIO_TRANSCRIPTION_CAPABLE = False
@ -1367,7 +1369,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
model_id="prebuilt-layout", model_id="prebuilt-layout",
analyze_request=AnalyzeDocumentRequest(bytes_source=file_bytes), analyze_request=AnalyzeDocumentRequest(bytes_source=file_bytes),
features=analysis_features, features=analysis_features,
output_content_format=ContentFormat.MARKDOWN, output_content_format=CONTENT_FORMAT, # TODO: replace with "ContentFormat.MARKDOWN" when the bug is fixed
) )
result: AnalyzeResult = poller.result() result: AnalyzeResult = poller.result()
@ -1446,6 +1448,8 @@ class MarkItDown:
if docintel_endpoint is not None: if docintel_endpoint is not None:
self._docintel_converter = DocumentIntelligenceConverter(endpoint=docintel_endpoint) self._docintel_converter = DocumentIntelligenceConverter(endpoint=docintel_endpoint)
else:
self._docintel_converter = None
self._page_converters: List[DocumentConverter] = [] self._page_converters: List[DocumentConverter] = []