fenen/html_converter.py

31 lines
1008 B
Python

from html.parser import HTMLParser
class HTMLConverter(HTMLParser):
text = ""
links = []
def __init__(self):
HTMLParser.__init__(self)
self.text = ""
self.links = []
def handle_data(self, data):
self.text += data
def handle_starttag(self, tag, attrs):
if tag in ["img", "video", "audio"]:
attrs = dict((key, value) for key, value in attrs)
title = attrs["title"] + " " if "title" in attrs.keys() else ""
src = attrs["src"] if "src" in attrs.keys() else ""
alt = attrs["alt"] if "alt" in attrs.keys() else ""
self.text += f'[{tag.upper()}: {title}{src} - "{alt}"]'
elif tag == "a":
attrs = dict((key, value) for key, value in attrs)
href = attrs["href"] if "href" in attrs else ""
self.links.append(f"[{len(self.links)}] {href}")
def handle_endtag(self, tag):
if tag == "a":
self.text += f"[{len(self.links) - 1}]"