Implemented HTML Tools (#240)
Co-authored-by: widlam <mikolaj.widla@gmail.com> Reviewed-on: #240 Reviewed-by: Adam Bem <bema@noreply.example.com> Co-authored-by: Mikolaj Widla <widlam@noreply.example.com> Co-committed-by: Mikolaj Widla <widlam@noreply.example.com>
This commit is contained in:
@@ -1,7 +1,28 @@
|
||||
from lxml import etree
|
||||
from lxml import etree, html
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
def convertHTML(source: str, sourceFrom: str):
|
||||
htmlParser = html.HTMLParser(remove_comments=True, remove_blank_text=True)
|
||||
xmlParser = etree.XMLParser(remove_comments=True, remove_blank_text=True)
|
||||
|
||||
if sourceFrom == "xml":
|
||||
xmldoc = etree.parse(BytesIO(source.encode("utf-8")), xmlParser)
|
||||
return html.tostring(xmldoc, method="html", pretty_print=True).decode()
|
||||
elif sourceFrom == "html":
|
||||
htmldoc = html.parse(BytesIO(source.encode("utf-8")), htmlParser)
|
||||
return etree.tostring(htmldoc, method="xml", pretty_print=True, doctype="", xml_declaration=True, encoding="utf-8").decode()
|
||||
else:
|
||||
return
|
||||
|
||||
|
||||
def formatHTML(source: str, prettify: bool) -> str:
|
||||
parser = html.HTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
|
||||
htmlDoc = html.parse(BytesIO(source.encode("utf-8")),parser=parser)
|
||||
if not prettify:
|
||||
return html.tostring(htmlDoc).decode().replace("\n", "").replace("> ", ">")
|
||||
return etree.tostring(htmlDoc, encoding='unicode', pretty_print=True)
|
||||
|
||||
def formatXML(source: str, prettify: bool) -> str:
|
||||
"""Method used to format XML
|
||||
|
||||
|
||||
Reference in New Issue
Block a user