chore: delete to separate the code format guess part to another PR

This commit is contained in:
Hieu Lam 2025-02-10 19:00:14 +07:00
parent a75f1a68fb
commit 95da5fd2ae
2 changed files with 5 additions and 29 deletions

View file

@ -81,12 +81,6 @@ result = md.convert("example.jpg")
print(result.text_content) print(result.text_content)
``` ```
### Extensions
#### Automatic Code Language Detection
Install `guesslang` with the command `pip install guesslang` to enable automatic code language recognition, especially useful for converting documents from web pages.
### Docker ### Docker
```sh ```sh

View file

@ -76,17 +76,6 @@ except ModuleNotFoundError:
pass pass
try:
from guesslang import Guess
except ImportError:
warn("The 'guesslang' package is not installed. Please install it via 'pip install guesslang'.")
class Guess:
def language_name(self, code: str) -> str:
return ""
guess = Guess()
class _CustomMarkdownify(markdownify.MarkdownConverter): class _CustomMarkdownify(markdownify.MarkdownConverter):
""" """
A custom version of markdownify's MarkdownConverter. Changes include: A custom version of markdownify's MarkdownConverter. Changes include:
@ -99,19 +88,12 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
def __init__(self, **options: Any): def __init__(self, **options: Any):
options["heading_style"] = options.get("heading_style", markdownify.ATX) options["heading_style"] = options.get("heading_style", markdownify.ATX)
# Keep inline images in table elements
options["keep_inline_images_in"] = options.get("keep_inline_images_in", ["td", "tr", "div", "p", "span"])
# Add a custom code language callback to guess the language of code snippets # Keep inline images in table elements
def code_language_callback(el): options["keep_inline_images_in"] = options.get(
extracted_code_snippet = el.get_text() "keep_inline_images_in", ["td", "tr", "div", "p", "span"]
if not extracted_code_snippet: )
return ""
language = guess.language_name(extracted_code_snippet)
return language.lower() if language else ""
options["code_language_callback"] = options.get("code_language_callback", code_language_callback)
# Explicitly cast options to the expected type if necessary # Explicitly cast options to the expected type if necessary
super().__init__(**options) super().__init__(**options)