replace implicit styling at the top, and catch style missing case

This commit is contained in:
Damien Elmes 2013-05-23 12:00:28 +09:00
parent b1ae088042
commit 34b7eaf4c9

View File

@ -287,6 +287,19 @@ document.onclick = function (evt) {
def _filterHTML(html):
doc = BeautifulSoup(html)
# remove implicit regular font style from outermost element
if doc.span:
try:
attrs = doc.span['style'].split(";")
except (KeyError, TypeError):
attrs = []
if attrs:
new = []
for attr in attrs:
sattr = attr.strip()
if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
new.append(sattr)
doc.span['style'] = ";".join(new)
# filter out implicit formatting from webkit
for tag in doc("span", "Apple-style-span"):
preserve = ""
@ -335,15 +348,6 @@ def _filterHTML(html):
for elem in "html", "head", "body", "meta":
for tag in doc(elem):
tag.replaceWithChildren()
# remove outer styling if implicit
if doc.span:
hadExtraAttr = False
for attr in doc.span['style'].split(";"):
attr = attr.strip()
if attr and attr not in ("font-style: normal", "font-weight: normal"):
hadExtraAttr = True
if hadExtraAttr:
doc.span.replaceWithChildren()
html = unicode(doc)
return html