Doc Intelligence fixes for refactored code (#325)

* added priority flag to doc intel converter constructor
* fixed analysis features bug for docx
This commit is contained in:
KennyZhang1 2025-02-11 19:01:46 -05:00 committed by GitHub
parent 935da9976c
commit 97eeed5f32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,4 +1,5 @@
from typing import Any, Union
import re
# Azure imports
from azure.ai.documentintelligence import DocumentIntelligenceClient
@ -36,6 +37,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
api_version=self.api_version,
credential=DefaultAzureCredential(),
)
self._priority = priority
def convert(
self, local_path: str, **kwargs: Any
@ -62,8 +64,8 @@ class DocumentIntelligenceConverter(DocumentConverter):
with open(local_path, "rb") as f:
file_bytes = f.read()
# Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html)
if extension.lower() in [".xlsx", ".pptx", ".html"]:
# Certain document analysis features are not availiable for office filetypes (.xlsx, .pptx, .html, .docx)
if extension.lower() in [".xlsx", ".pptx", ".html", ".docx"]:
analysis_features = []
else:
analysis_features = [