Small refactor for MarkItDown.convert_response function.
This commit is contained in:
parent
18e3f1d428
commit
1952ba21b0
1 changed files with 8 additions and 20 deletions
|
|
@ -1385,38 +1385,26 @@ class MarkItDown:
|
||||||
content_disposition = response.headers.get("content-disposition", "")
|
content_disposition = response.headers.get("content-disposition", "")
|
||||||
m = re.search(r"filename=([^;]+)", content_disposition)
|
m = re.search(r"filename=([^;]+)", content_disposition)
|
||||||
if m:
|
if m:
|
||||||
base, ext = os.path.splitext(m.group(1).strip("\"'"))
|
_, ext = os.path.splitext(m.group(1).strip("\"'"))
|
||||||
self._append_ext(extensions, ext)
|
self._append_ext(extensions, ext)
|
||||||
|
|
||||||
# Read from the extension from the path
|
# Read from the extension from the path
|
||||||
base, ext = os.path.splitext(urlparse(response.url).path)
|
_, ext = os.path.splitext(urlparse(response.url).path)
|
||||||
self._append_ext(extensions, ext)
|
self._append_ext(extensions, ext)
|
||||||
|
|
||||||
# Save the file locally to a temporary file. It will be deleted before this method exits
|
# Save the file locally to a temporary file. It will be deleted before this method exits
|
||||||
handle, temp_path = tempfile.mkstemp()
|
with tempfile.NamedTemporaryFile("wb") as temp_file:
|
||||||
fh = os.fdopen(handle, "wb")
|
|
||||||
result = None
|
|
||||||
try:
|
|
||||||
# Download the file
|
# Download the file
|
||||||
for chunk in response.iter_content(chunk_size=512):
|
for chunk in response.iter_content(chunk_size=512):
|
||||||
fh.write(chunk)
|
temp_file.write(chunk)
|
||||||
fh.close()
|
temp_file.flush()
|
||||||
|
|
||||||
# Use puremagic to check for more extension options
|
# Use puremagic to check for more extension options
|
||||||
for g in self._guess_ext_magic(temp_path):
|
for g in self._guess_ext_magic(temp_file.name):
|
||||||
self._append_ext(extensions, g)
|
self._append_ext(extensions, g)
|
||||||
|
|
||||||
# Convert
|
# Convert and return
|
||||||
result = self._convert(temp_path, extensions, url=response.url, **kwargs)
|
return self._convert(temp_file.name, extensions, url=response.url, **kwargs)
|
||||||
# Clean up
|
|
||||||
finally:
|
|
||||||
try:
|
|
||||||
fh.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
os.unlink(temp_path)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _convert(
|
def _convert(
|
||||||
self, local_path: str, extensions: List[Union[str, None]], **kwargs
|
self, local_path: str, extensions: List[Union[str, None]], **kwargs
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue