New Research: Supply Chain Attack on Axios Pulls Malicious Dependency from npm.Details
Socket
Book a DemoSign in
Socket

html2docx

Package Overview
Dependencies
Maintainers
3
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html2docx - pypi Package Compare versions

Comparing version
1.5.0
to
1.6.0
+1
tests/data/table.html
<table><tr><td>1</td><td><b>2</b></td></tr><tr><td>3</td></tr></table>
[
{
"table": [
[{
"cell": [{
"text": "1",
"runs": [{ "text": "1" }]
}]
},
{
"cell": [{
"text": "2",
"runs": [{ "text": "2", "bold": true}]
}]
}],
[{
"cell": [{
"text": "3",
"runs": [{ "text": "3" }]
}]
},
{
"cell": [{
"text": "",
"runs": []
}]
}]
]
}
]
+1
-1
Metadata-Version: 2.1
Name: html2docx
Version: 1.5.0
Version: 1.6.0
Summary: Convert valid HTML input to docx.

@@ -5,0 +5,0 @@ Home-page: https://github.com/erezlife/html2docx

@@ -104,2 +104,4 @@ LICENSE

tests/data/sup.json
tests/data/table.html
tests/data/table.json
tests/data/underline-strikethrough.html

@@ -106,0 +108,0 @@ tests/data/underline-strikethrough.json

@@ -8,2 +8,3 @@ import re

from docx.shared import Pt
from docx.table import Table
from docx.text.paragraph import Paragraph

@@ -82,2 +83,3 @@ from docx.text.run import Run

self.pre = False
self.table: Optional[Tuple[Table, int, int]] = None
self.alignment: Optional[int] = None

@@ -106,2 +108,33 @@ self.padding_left: Optional[Pt] = None

def init_table(self, attrs: List[Tuple[str, Optional[str]]]) -> None:
self.table = (self.doc.add_table(rows=0, cols=0), -1, -1)
def finish_table(self) -> None:
if self.table is None:
return
section = self.doc.sections[0]
page_width = section.page_width - section.left_margin - section.right_margin
table = self.table[0]
for col in table.columns:
col.width = page_width // len(table.columns)
self.table = None
def init_tr(self) -> None:
if self.table is None:
return
table, row, col = self.table
table.add_row()
self.table = table, row + 1, -1
def init_tdth(self) -> None:
if self.table is None:
return
table, row, col = self.table
col += 1
self.table = (table, row, col)
if col >= len(table.columns):
table.add_column(0)
self.p = self.table[0].cell(row, col).paragraphs[0]
self.r = None
def init_run(self, attrs: List[Tuple[str, Any]]) -> None:

@@ -201,2 +234,8 @@ self.attrs.append(attrs)

self.add_list_style("List Bullet")
elif tag == "table":
self.init_table(attrs)
elif tag == "tr":
self.init_tr()
elif tag in ["td", "th"]:
self.init_tdth()

@@ -229,1 +268,6 @@ def handle_data(self, data: str) -> None:

self.pre = False
elif tag == "table":
self.finish_table()
elif tag in ["td", "th"]:
self.p = None
self.r = None
Metadata-Version: 2.1
Name: html2docx
Version: 1.5.0
Version: 1.6.0
Summary: Convert valid HTML input to docx.

@@ -5,0 +5,0 @@ Home-page: https://github.com/erezlife/html2docx

[build-system]
requires = [
"setuptools>=42",
"wheel",
]
build-backend = "setuptools.build_meta:__legacy__"
requires = ["setuptools>=42"]
build-backend = "setuptools.build_meta"
[metadata]
name = html2docx
version = 1.5.0
version = 1.6.0
url = https://github.com/erezlife/html2docx

@@ -5,0 +5,0 @@ author = eRezLife

import json
from typing import Union
import docx
import pytest
from docx.document import Document
from docx.oxml import CT_P, CT_Tbl
from docx.shared import Pt
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph

@@ -29,2 +34,80 @@ from html2docx import html2docx

def get_document_children(element: Union[Document, _Cell]):
if isinstance(element, Document):
parent_element = element.element.body
elif isinstance(element, _Cell):
parent_element = element._tc
else:
raise Exception("Received an item that does not have children.")
for child in parent_element.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, element)
elif isinstance(child, CT_Tbl):
yield Table(child, element)
def assert_paragraph_comply_with_spec(
p: Paragraph, p_spec: dict, html_rel_path: str, spec_rel_path: str
):
assert p.text == p_spec["text"]
assert p.style.name == p_spec.get("style", "Normal")
if p_spec.get("alignment") is not None:
assert p.alignment == p_spec["alignment"]
else:
assert p.alignment is None
if p_spec.get("left_indent"):
assert p.paragraph_format.left_indent == Pt(p_spec["left_indent"])
else:
assert p.paragraph_format.left_indent is None
runs_spec = p_spec["runs"]
assert len(p.runs) == len(runs_spec)
for run, run_spec in zip(p.runs, runs_spec):
assert run.text == run_spec.pop("text")
shapes_spec = run_spec.pop("shapes", None)
unknown = set(run_spec).difference(FONT_ATTRS)
assert not unknown, "Unknown attributes in {}: {}".format(
spec_rel_path, ", ".join(unknown)
)
for attr in FONT_ATTRS:
msg = f"Wrong {attr} for text '{run.text}' in {html_rel_path}"
assert getattr(run.font, attr) == run_spec.get(attr), msg
if shapes_spec:
shapes = run.part.inline_shapes
assert len(shapes) == len(shapes_spec)
for shape, shape_spec in zip(shapes, shapes_spec):
assert shape.type == shape_spec["type"]
assert shape.width == shape_spec["width"]
assert shape.height == shape_spec["height"]
def assert_table_comply_with_spec(
t: Table, t_spec: dict, html_rel_path: str, spec_rel_path: str
):
assert "table" in t_spec
assert len(t.rows) == len(t_spec["table"])
for (row, row_spec) in zip(t.rows, t_spec["table"]):
assert len(t.columns) == len(row_spec)
for (cell, cell_spec) in zip(row.cells, row_spec):
assert_element_comply_with_spec(
cell, cell_spec["cell"], html_rel_path, spec_rel_path
)
def assert_element_comply_with_spec(
element: Union[Document, _Cell], spec: dict, html_rel_path: str, spec_rel_path: str
):
children = list(get_document_children(element))
assert len(children) == len(spec)
for child, child_spec in zip(children, spec):
if isinstance(child, Paragraph):
assert_paragraph_comply_with_spec(
child, child_spec, html_rel_path, spec_rel_path
)
if isinstance(child, Table):
assert_table_comply_with_spec(
child, child_spec, html_rel_path, spec_rel_path
)
@pytest.mark.parametrize("html_path,spec_path", generate_testdata())

@@ -44,33 +127,2 @@ def test_html2docx(html_path, spec_path):

assert doc.core_properties.title == title
assert len(doc.paragraphs) == len(spec)
for p, p_spec in zip(doc.paragraphs, spec):
assert p.text == p_spec["text"]
assert p.style.name == p_spec.get("style", "Normal")
if p_spec.get("alignment") is not None:
assert p.alignment == p_spec["alignment"]
else:
assert p.alignment is None
if p_spec.get("left_indent"):
assert p.paragraph_format.left_indent == Pt(p_spec["left_indent"])
else:
assert p.paragraph_format.left_indent is None
runs_spec = p_spec["runs"]
assert len(p.runs) == len(runs_spec)
for run, run_spec in zip(p.runs, runs_spec):
assert run.text == run_spec.pop("text")
shapes_spec = run_spec.pop("shapes", None)
unknown = set(run_spec).difference(FONT_ATTRS)
assert not unknown, "Unknown attributes in {}: {}".format(
spec_rel_path, ", ".join(unknown)
)
for attr in FONT_ATTRS:
msg = f"Wrong {attr} for text '{run.text}' in {html_rel_path}"
assert getattr(run.font, attr) == run_spec.get(attr), msg
if shapes_spec:
shapes = run.part.inline_shapes
assert len(shapes) == len(shapes_spec)
for shape, shape_spec in zip(shapes, shapes_spec):
assert shape.type == shape_spec["type"]
assert shape.width == shape_spec["width"]
assert shape.height == shape_spec["height"]
assert_element_comply_with_spec(doc, spec, html_rel_path, spec_rel_path)