From bf6a15e9b5eb89820bf82c04cbe934bf62fb8617 Mon Sep 17 00:00:00 2001
From: KennyZhang1 <90438893+KennyZhang1@users.noreply.github.com>
Date: Sat, 1 Feb 2025 01:23:26 -0500
Subject: [PATCH 1/3] Kennyzhang/docintel docs (#312)

* updated docs to include doc intelligence

* include reference to doc intel setup docs
---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index 6bc91e6..76a4d3f 100644
--- a/README.md
+++ b/README.md
@@ -33,12 +33,20 @@ Or use `-o` to specify the output file:
 markitdown path-to-file.pdf -o document.md
 ```
 
+To use Document Intelligence conversion:
+
+```bash
+markitdown path-to-file.pdf -o document.md -d -e "<document_intelligence_endpoint>"
+```
+
 You can also pipe content:
 
 ```bash
 cat path-to-file.pdf | markitdown
 ```
 
+More information about how to set up an Azure Document Intelligence Resource can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/how-to-guides/create-document-intelligence-resource?view=doc-intel-4.0.0)
+
 ### Python API
 
 Basic usage in Python:
@@ -51,6 +59,16 @@ result = md.convert("test.xlsx")
 print(result.text_content)
 ```
 
+Document Intelligence conversion in Python:
+
+```python
+from markitdown import MarkItDown
+
+md = MarkItDown(docintel_endpoint="<document_intelligence_endpoint>")
+result = md.convert("test.pdf")
+print(result.text_content)
+```
+
 To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`:
 
 ```python

From 7bea2672a05f5877acb8690b20222593dab13788 Mon Sep 17 00:00:00 2001
From: ZeyuTeng96 <96521059+ZeyuTeng96@users.noreply.github.com>
Date: Sun, 9 Feb 2025 12:28:35 +0800
Subject: [PATCH 2/3] remove leading and trailing \n for HtmlConverter (#262)

---
 src/markitdown/_markitdown.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py
index ae6a7b4..6f40547 100644
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@@ -236,6 +236,9 @@ class HtmlConverter(DocumentConverter):
 
         assert isinstance(webpage_text, str)
 
+        # remove leading and trailing \n
+        webpage_text = webpage_text.strip()
+
         return DocumentConverterResult(
             title=None if soup.title is None else soup.title.string,
             text_content=webpage_text,

From 3090917a49dc8ec94682c47747f3e2692e3953ae Mon Sep 17 00:00:00 2001
From: James Hickey <jamesmh@users.noreply.github.com>
Date: Sun, 9 Feb 2025 00:30:13 -0400
Subject: [PATCH 3/3] Typo fixed (#270)

---
 src/markitdown/_markitdown.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py
index 6f40547..e4884ec 100644
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@@ -217,7 +217,7 @@ class HtmlConverter(DocumentConverter):
         return result
 
     def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]:
-        """Helper function that converts and HTML string."""
+        """Helper function that converts an HTML string."""
 
         # Parse the string
         soup = BeautifulSoup(html_content, "html.parser")