Promote discussion of converter priority to a docstring.
This commit is contained in:
parent
d1868f8588
commit
540410e5c8
2 changed files with 22 additions and 18 deletions
|
|
@ -99,23 +99,6 @@ class MarkItDown:
|
|||
# Register the converters
|
||||
self._page_converters: List[DocumentConverter] = []
|
||||
|
||||
# Note: We have tight control over the order of built-in converters, but
|
||||
# plugins can register converters in any order. A converter's .priority
|
||||
# reasserts some control over the order of converters.
|
||||
#
|
||||
# Priorities work as follows. By default, most converters get priority
|
||||
# DocumentConverter.PRIORITY_SPECIFIC_FILE_FORMAT (== 0). The exception
|
||||
# is the PlainTextConverter, which gets priority PRIORITY_SPECIFIC_FILE_FORMAT (== 10),
|
||||
# with lower values being tried first (i.e., higher priority).
|
||||
#
|
||||
# Just prior to conversion, the converters are sorted by priority, using
|
||||
# a stable sort. This means that converters with the same priority will
|
||||
# remain in the same order, with the most recently registered converters
|
||||
# appearing first.
|
||||
#
|
||||
# Plugins can register converters with any priority, to appear before or
|
||||
# after the built-ins. For example, a plugin with priority 9 will run
|
||||
# before the PlainTextConverter, but after the built-in converters.
|
||||
if (
|
||||
enable_builtins is None or enable_builtins
|
||||
): # Default to True when not specified
|
||||
|
|
@ -128,7 +111,6 @@ class MarkItDown:
|
|||
"""
|
||||
Enable and register built-in converters.
|
||||
Built-in converters are enabled by default.
|
||||
This method should only be called once, if built-ins were initially disabled.
|
||||
"""
|
||||
if not self._builtins_enabled:
|
||||
# TODO: Move these into converter constructors
|
||||
|
|
|
|||
|
|
@ -21,6 +21,28 @@ class DocumentConverter:
|
|||
)
|
||||
|
||||
def __init__(self, priority: float = PRIORITY_SPECIFIC_FILE_FORMAT):
|
||||
"""
|
||||
Initialize the DocumentConverter with a given priority.
|
||||
|
||||
Priorities work as follows: By default, most converters get priority
|
||||
DocumentConverter.PRIORITY_SPECIFIC_FILE_FORMAT (== 0). The exception
|
||||
is the PlainTextConverter, which gets priority PRIORITY_SPECIFIC_FILE_FORMAT (== 10),
|
||||
with lower values being tried first (i.e., higher priority).
|
||||
|
||||
Just prior to conversion, the converters are sorted by priority, using
|
||||
a stable sort. This means that converters with the same priority will
|
||||
remain in the same order, with the most recently registered converters
|
||||
appearing first.
|
||||
|
||||
We have tight control over the order of built-in converters, but
|
||||
plugins can register converters in any order. A converter's priority
|
||||
field reasserts some control over the order of converters.
|
||||
This method should only be called once, if built-ins were initially disabled.
|
||||
|
||||
Plugins can register converters with any priority, to appear before or
|
||||
after the built-ins. For example, a plugin with priority 9 will run
|
||||
before the PlainTextConverter, but after the built-in converters.
|
||||
"""
|
||||
self._priority = priority
|
||||
|
||||
def convert(
|
||||
|
|
|
|||
Loading…
Reference in a new issue