diff --git a/README.md b/README.md index 252d746..76a4d3f 100644 --- a/README.md +++ b/README.md @@ -81,12 +81,6 @@ result = md.convert("example.jpg") print(result.text_content) ``` -### Extensions - -#### Automatic Code Language Detection - -Install `guesslang` with the command `pip install guesslang` to enable automatic code language recognition, especially useful for converting documents from web pages. - ### Docker ```sh diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index cf8149d..8cd4b49 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -76,17 +76,6 @@ except ModuleNotFoundError: pass -try: - from guesslang import Guess -except ImportError: - warn("The 'guesslang' package is not installed. Please install it via 'pip install guesslang'.") - class Guess: - def language_name(self, code: str) -> str: - return "" - -guess = Guess() - - class _CustomMarkdownify(markdownify.MarkdownConverter): """ A custom version of markdownify's MarkdownConverter. Changes include: @@ -99,19 +88,12 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): def __init__(self, **options: Any): options["heading_style"] = options.get("heading_style", markdownify.ATX) - - # Keep inline images in table elements - options["keep_inline_images_in"] = options.get("keep_inline_images_in", ["td", "tr", "div", "p", "span"]) - # Add a custom code language callback to guess the language of code snippets - def code_language_callback(el): - extracted_code_snippet = el.get_text() - if not extracted_code_snippet: - return "" - language = guess.language_name(extracted_code_snippet) - return language.lower() if language else "" - options["code_language_callback"] = options.get("code_language_callback", code_language_callback) - + # Keep inline images in table elements + options["keep_inline_images_in"] = options.get( + "keep_inline_images_in", ["td", "tr", "div", "p", "span"] + ) + # Explicitly cast options to the expected type if necessary super().__init__(**options)