Deal with Internationalized Domain Name in URI

This fixes https://anki.tenderapp.com/discussions/ankidesktop/17132-anki-reports-an-error-when-pasting-an-image-from-non-ascii-url

The problem is that urllib2 can't work with IDN. However, it's perfectly valid
to have such URIs in HTML, and Firefox, when copying image, generates exactly
this kind of HTML.
This commit is contained in:
Roman Franchuk 2016-04-17 15:49:28 +03:00
parent 9f4fd8a9b6
commit a68af5f199

View File

@ -7,6 +7,8 @@ import urllib2
import ctypes
import urllib
from urlparse import urlparse, urlunparse
from anki.lang import _
from aqt.qt import *
from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia
@ -837,6 +839,12 @@ to a cloze type first, via Edit>Change Note Type."""))
self.mw.progress.start(
immediate=True, parent=self.parentWindow)
try:
# urllib2 doesn't work properly with IRI
# The following code translates IRI to standard URI
scheme, netloc, path, params, query, fragment = urlparse(url)
idna_netloc = urllib2.unquote(netloc.encode("ascii")).decode("utf-8").encode("idna")
url = urlunparse([scheme, idna_netloc, path, params, query, fragment])
req = urllib2.Request(url, None, {
'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
filecontents = urllib2.urlopen(req).read()