Promote discussion of converter priority to a docstring.
This commit is contained in:
parent
d1868f8588
commit
540410e5c8
2 changed files with 22 additions and 18 deletions
|
|
@ -99,23 +99,6 @@ class MarkItDown:
|
||||||
# Register the converters
|
# Register the converters
|
||||||
self._page_converters: List[DocumentConverter] = []
|
self._page_converters: List[DocumentConverter] = []
|
||||||
|
|
||||||
# Note: We have tight control over the order of built-in converters, but
|
|
||||||
# plugins can register converters in any order. A converter's .priority
|
|
||||||
# reasserts some control over the order of converters.
|
|
||||||
#
|
|
||||||
# Priorities work as follows. By default, most converters get priority
|
|
||||||
# DocumentConverter.PRIORITY_SPECIFIC_FILE_FORMAT (== 0). The exception
|
|
||||||
# is the PlainTextConverter, which gets priority PRIORITY_SPECIFIC_FILE_FORMAT (== 10),
|
|
||||||
# with lower values being tried first (i.e., higher priority).
|
|
||||||
#
|
|
||||||
# Just prior to conversion, the converters are sorted by priority, using
|
|
||||||
# a stable sort. This means that converters with the same priority will
|
|
||||||
# remain in the same order, with the most recently registered converters
|
|
||||||
# appearing first.
|
|
||||||
#
|
|
||||||
# Plugins can register converters with any priority, to appear before or
|
|
||||||
# after the built-ins. For example, a plugin with priority 9 will run
|
|
||||||
# before the PlainTextConverter, but after the built-in converters.
|
|
||||||
if (
|
if (
|
||||||
enable_builtins is None or enable_builtins
|
enable_builtins is None or enable_builtins
|
||||||
): # Default to True when not specified
|
): # Default to True when not specified
|
||||||
|
|
@ -128,7 +111,6 @@ class MarkItDown:
|
||||||
"""
|
"""
|
||||||
Enable and register built-in converters.
|
Enable and register built-in converters.
|
||||||
Built-in converters are enabled by default.
|
Built-in converters are enabled by default.
|
||||||
This method should only be called once, if built-ins were initially disabled.
|
|
||||||
"""
|
"""
|
||||||
if not self._builtins_enabled:
|
if not self._builtins_enabled:
|
||||||
# TODO: Move these into converter constructors
|
# TODO: Move these into converter constructors
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,28 @@ class DocumentConverter:
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, priority: float = PRIORITY_SPECIFIC_FILE_FORMAT):
|
def __init__(self, priority: float = PRIORITY_SPECIFIC_FILE_FORMAT):
|
||||||
|
"""
|
||||||
|
Initialize the DocumentConverter with a given priority.
|
||||||
|
|
||||||
|
Priorities work as follows: By default, most converters get priority
|
||||||
|
DocumentConverter.PRIORITY_SPECIFIC_FILE_FORMAT (== 0). The exception
|
||||||
|
is the PlainTextConverter, which gets priority PRIORITY_SPECIFIC_FILE_FORMAT (== 10),
|
||||||
|
with lower values being tried first (i.e., higher priority).
|
||||||
|
|
||||||
|
Just prior to conversion, the converters are sorted by priority, using
|
||||||
|
a stable sort. This means that converters with the same priority will
|
||||||
|
remain in the same order, with the most recently registered converters
|
||||||
|
appearing first.
|
||||||
|
|
||||||
|
We have tight control over the order of built-in converters, but
|
||||||
|
plugins can register converters in any order. A converter's priority
|
||||||
|
field reasserts some control over the order of converters.
|
||||||
|
This method should only be called once, if built-ins were initially disabled.
|
||||||
|
|
||||||
|
Plugins can register converters with any priority, to appear before or
|
||||||
|
after the built-ins. For example, a plugin with priority 9 will run
|
||||||
|
before the PlainTextConverter, but after the built-in converters.
|
||||||
|
"""
|
||||||
self._priority = priority
|
self._priority = priority
|
||||||
|
|
||||||
def convert(
|
def convert(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue