31 lines
1008 B
Python
31 lines
1008 B
Python
from html.parser import HTMLParser
|
|
|
|
|
|
class HTMLConverter(HTMLParser):
|
|
text = ""
|
|
links = []
|
|
|
|
def __init__(self):
|
|
HTMLParser.__init__(self)
|
|
self.text = ""
|
|
self.links = []
|
|
|
|
def handle_data(self, data):
|
|
self.text += data
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag in ["img", "video", "audio"]:
|
|
attrs = dict((key, value) for key, value in attrs)
|
|
title = attrs["title"] + " " if "title" in attrs.keys() else ""
|
|
src = attrs["src"] if "src" in attrs.keys() else ""
|
|
alt = attrs["alt"] if "alt" in attrs.keys() else ""
|
|
self.text += f'[{tag.upper()}: {title}{src} - "{alt}"]'
|
|
elif tag == "a":
|
|
attrs = dict((key, value) for key, value in attrs)
|
|
href = attrs["href"] if "href" in attrs else ""
|
|
self.links.append(f"[{len(self.links)}] {href}")
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag == "a":
|
|
self.text += f"[{len(self.links) - 1}]"
|