chore: consider header for column-wise drop
This commit is contained in:
parent
113f7748b7
commit
7b64e6ebfd
2 changed files with 7 additions and 2 deletions
|
|
@ -525,7 +525,7 @@ class XlsxConverter(HtmlConverter):
|
||||||
def _clean_colname(self, colname: Any) -> Any:
|
def _clean_colname(self, colname: Any) -> Any:
|
||||||
# Remove Pandas header placeholders
|
# Remove Pandas header placeholders
|
||||||
if isinstance(colname, str) and colname.startswith("Unnamed:"):
|
if isinstance(colname, str) and colname.startswith("Unnamed:"):
|
||||||
return ""
|
return None
|
||||||
return colname
|
return colname
|
||||||
|
|
||||||
def convert(
|
def convert(
|
||||||
|
|
@ -548,11 +548,16 @@ class XlsxConverter(HtmlConverter):
|
||||||
sheet = sheet.rename(columns=lambda col: self._clean_colname(col))
|
sheet = sheet.rename(columns=lambda col: self._clean_colname(col))
|
||||||
|
|
||||||
if drop_empty_cols:
|
if drop_empty_cols:
|
||||||
sheet = sheet.dropna(axis=1, how="all")
|
# also consider headers to be part of the column
|
||||||
|
sheet = sheet.loc[:, sheet.notna().any() | sheet.columns.notna()]
|
||||||
|
|
||||||
if drop_empty_rows:
|
if drop_empty_rows:
|
||||||
sheet = sheet.dropna(axis=0, how="all")
|
sheet = sheet.dropna(axis=0, how="all")
|
||||||
|
|
||||||
|
# convert remaining NaN's to empty string
|
||||||
|
# because .to_html(na_rep="") does not apply to headers
|
||||||
|
sheet.columns = sheet.columns.fillna(na_rep)
|
||||||
|
|
||||||
html_content = sheet.to_html(index=False, na_rep=na_rep)
|
html_content = sheet.to_html(index=False, na_rep=na_rep)
|
||||||
md_content += self._convert(html_content).text_content.strip() + "\n\n"
|
md_content += self._convert(html_content).text_content.strip() + "\n\n"
|
||||||
|
|
||||||
|
|
|
||||||
BIN
tests/test_files/test.xlsx
vendored
BIN
tests/test_files/test.xlsx
vendored
Binary file not shown.
Loading…
Reference in a new issue