124 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			124 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from lxml import etree, html
 | |
| from io import BytesIO
 | |
| 
 | |
| 
 | |
| def convertHTML(source: str, sourceFrom: str):
 | |
|     htmlParser = html.HTMLParser(remove_comments=True, remove_blank_text=True)
 | |
|     xmlParser = etree.XMLParser(remove_comments=True, remove_blank_text=True)
 | |
| 
 | |
|     if sourceFrom == "xml":
 | |
|         xmldoc = etree.parse(BytesIO(source.encode("utf-8")), xmlParser)
 | |
|         return html.tostring(xmldoc, method="html", pretty_print=True, doctype="<!DOCTYPE html>").decode()
 | |
|     elif sourceFrom == "html":
 | |
|         htmldoc = html.parse(BytesIO(source.encode("utf-8")), htmlParser)
 | |
|         return etree.tostring(htmldoc, method="xml", pretty_print=True, doctype="", xml_declaration=True, encoding="utf-8").decode()
 | |
|     else:
 | |
|         return
 | |
| 
 | |
| 
 | |
| def formatHTML(source: str, prettify: bool) -> str:
 | |
|     parser = html.HTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
 | |
|     htmlDoc = html.parse(BytesIO(source.encode("utf-8")),parser=parser)
 | |
|     if not prettify:
 | |
|         return html.tostring(htmlDoc).decode().replace("\n", "").replace(">    ", ">")
 | |
|     return etree.tostring(htmlDoc, encoding='unicode', pretty_print=True)
 | |
| 
 | |
| def formatXML(source: str, prettify: bool) -> str:
 | |
|     """Method used to format XML
 | |
| 
 | |
|     :param source: XML to format
 | |
|     :param prettify: sets if XML must be prettified 
 | |
|     (added indentations etc.) or not
 | |
|     :return: formatted XML
 | |
|     """
 | |
| 
 | |
|     # Prolog is removed when XML is parsed
 | |
|     # so program has to copy it
 | |
|     prolog = ""
 | |
|     prolog_start = source.find("<?")
 | |
|     
 | |
|     if prolog_start != -1:
 | |
|         prolog_end = source.find("?>") + 2
 | |
|         prolog = source[prolog_start:prolog_end]
 | |
|         source = source[prolog_end: ]
 | |
|     
 | |
|     byte_input = BytesIO(source.encode("utf-8"))
 | |
|     parser = etree.XMLParser(remove_blank_text=True)
 | |
|     xml = etree.parse(byte_input, parser=parser)
 | |
| 
 | |
|     if prettify:
 | |
|         prolog += "\n"
 | |
| 
 | |
|     return prolog + etree.tostring(xml, pretty_print=prettify).decode()
 | |
| 
 | |
| 
 | |
| def xpath(source: str, xpath: str) -> str:
 | |
|     """
 | |
|     Method used to get nodes from XML string using XPath
 | |
| 
 | |
|     :param source: XML string used for selection
 | |
|     :param xpath: XPath query used for selection
 | |
|     :return: Nodes selected using XPath
 | |
|     """
 | |
| 
 | |
|     byte_input = BytesIO(source.encode("utf-8"))
 | |
|     root = etree.parse(byte_input).getroot()
 | |
|     nsmap = root.nsmap
 | |
| 
 | |
|     # LXML doesn't accept empty (None) namespace prefix,
 | |
|     # so it need to be deleted if exists
 | |
|     if None in nsmap:
 | |
|         nsmap.pop(None)
 | |
|     
 | |
|     result = root.xpath(xpath, namespaces=nsmap)
 | |
|     
 | |
|     # root.xpath can return 4 types: float, string, bool and list.
 | |
|     # List is the only one that can't be simply converted to str
 | |
|     if type(result) is not list:
 | |
|         return str(result), type(result).__name__
 | |
|     else:
 | |
|         result_string = ""
 | |
|         for e in result:
 | |
|             result_string += etree.tostring(e, pretty_print=True).decode() + "\n"
 | |
|         return result_string, "node"
 | |
| 
 | |
| 
 | |
| 
 | |
| def xsd(source: str, xsd: str) -> bool:
 | |
|     """
 | |
|     Method used to validate XML string against XSD schema
 | |
|     :param source: XML string used for validation
 | |
|     :param xsd: XSD schema to validate XML against
 | |
|     :return: Message saying, if the validation was successful or not
 | |
|     """
 | |
| 
 | |
|     schema_input = BytesIO(xsd.encode("utf-8"))
 | |
|     xml_schema = etree.XMLSchema(etree.parse(schema_input).getroot())
 | |
| 
 | |
|     document_input = BytesIO(source.encode("utf-8"))
 | |
|     xml = etree.parse(document_input).getroot()
 | |
|     
 | |
|     try:
 | |
|         xml_schema.assertValid(xml)
 | |
|         return "XML is valid"
 | |
|     except etree.DocumentInvalid as e:
 | |
|         return str(e)
 | |
| 
 | |
|     
 | |
|     
 | |
| def xslt(source: str, xslt: str) -> str:
 | |
|     """
 | |
|     Method used to transform XML string using XSLT
 | |
| 
 | |
|     :param source: XML string to transform
 | |
|     :param xslt: XSLT string used to transform XML
 | |
|     :return: Result of transformation
 | |
|     """
 | |
|     xslt_input = BytesIO(xslt.encode("utf-8"))
 | |
|     xslt_transform = etree.XSLT(etree.parse(xslt_input))
 | |
| 
 | |
|     document_input = BytesIO(source.encode("utf-8"))
 | |
|     xml = etree.parse(document_input).getroot()
 | |
| 
 | |
|     transformed = str(xslt_transform(xml))
 | |
|     return formatXML(transformed, True) |