From 020415efb9c0770337521e2a2f56fb044f564aac Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Wed, 29 Apr 2020 07:38:35 -0300 Subject: [PATCH 1/7] Replaced the mediasrv.py SimpleHttp server by flask and waitress, fixing HTML5 media support. https://stackoverflow.com/questions/37044064/html-audio-cant-set-currenttime https://stackoverflow.com/questions/21956683/enable-access-control-on-simple-http-server https://stackoverflow.com/questions/5052635/what-is-relation-between-content-length-and-byte-ranges-in-http-1-1 https://stackoverflow.com/questions/16725907/google-app-engine-serving-mp3-for-audio-element-needs-content-range-header I was trying to use HTML5 audio tag to display audios like: ```html ``` ![image](https://user-images.githubusercontent.com/5332158/79063321-565b5500-7c77-11ea-9f8d-6e1df6f07892.png) But the progress bar seek was not working. After researching, I found the problem was the HTML server not properly responding to the HTML5 header requests. The HTML server should respond to quite complicated things as 206 partial, properly handle keep-alive, provide media ranges and other HTTP headers: https://stackoverflow.com/questions/37044064/html-audio-cant-set-currenttime To implement all these on the Simple HTTP server would be quite complicated. Then, instead, I imported the `flask` web server, which is quite simple and straight forward to use. Now, the back-end is using a secure complaint HTTP back-end: 1. https://palletsprojects.com/p/flask/ > Flask is a lightweight WSGI web application framework. It is designed to make getting started quick and easy, with the ability to scale up to complex applications. It began as a simple wrapper around Werkzeug and Jinja and has become one of the most popular Python web application frameworks. > > Flask offers suggestions, but doesn't enforce any dependencies or project layout. It is up to the developer to choose the tools and libraries they want to use. There are many extensions provided by the community that make adding new functionality easy. 1. https://docs.pylonsproject.org/projects/waitress/en/latest/ > Waitress is meant to be a production-quality pure-Python WSGI server with very acceptable performance. It has no dependencies except ones which live in the Python standard library. It runs on CPython on Unix and Windows under Python 2.7+ and Python 3.5+. It is also known to run on PyPy 1.6.0 on UNIX. It supports HTTP/1.0 and HTTP/1.1. Right now, anki does not support fields passing file names directly to HTML audio tags, but this can be easily done with (https://github.com/ankitects/anki/pull 540 - Added arguments to the sound tag) plus the commit https://github.com/evandroforks/anki/commit/826a97df61b99814041c41c0f2c84268280ed8ad, the HTML5 audio tag can be used like this: ```html // Audio = [sound:myfile.mp3|onlyfilename] ``` ![image](https://user-images.githubusercontent.com/5332158/79063736-c539ad80-7c79-11ea-8420-40b72185f4e7.png) # Conflicts: # qt/aqt/mediasrv.py --- qt/aqt/mediasrv.py | 276 ++++++++++++++++++--------------------------- qt/setup.py | 3 + 2 files changed, 113 insertions(+), 166 deletions(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index a5bdb65db..cd8c0ab7b 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -1,20 +1,18 @@ # Copyright: Ankitects Pty Ltd and contributors # -*- coding: utf-8 -*- # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html - -from __future__ import annotations - -import http.server +import logging +import os import re -import socket -import socketserver +import sys import threading +import traceback from http import HTTPStatus -from typing import Optional -import aqt -from anki.collection import Collection -from anki.rsbackend import from_json_bytes +import flask +import flask_cors # type: ignore +from waitress.server import create_server # type: ignore + from anki.utils import devMode from aqt.qt import * from aqt.utils import aqt_data_folder @@ -33,181 +31,127 @@ def _getExportFolder(): _exportFolder = _getExportFolder() - -# webengine on windows sometimes opens a connection and fails to send a request, -# which will hang the server if unthreaded -class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): - # allow for a flood of requests before we've started up properly - request_queue_size = 100 - - # work around python not being able to handle non-latin hostnames - def server_bind(self): - """Override server_bind to store the server name.""" - socketserver.TCPServer.server_bind(self) - host, port = self.server_address[:2] - try: - self.server_name = socket.getfqdn(host) - except: - self.server_name = "server" - self.server_port = port +app = flask.Flask(__name__) +flask_cors.CORS(app) class MediaServer(threading.Thread): - _port: Optional[int] = None _ready = threading.Event() daemon = True def __init__(self, mw, *args, **kwargs): super().__init__(*args, **kwargs) - self.mw = mw + self.is_shutdown = False + _redirectWebExports.mw = mw def run(self): - RequestHandler.mw = self.mw - desired_port = int(os.getenv("ANKI_API_PORT", "0")) - self.server = ThreadedHTTPServer(("127.0.0.1", desired_port), RequestHandler) - self._ready.set() - self.server.serve_forever() + try: + if devMode: + # idempotent if logging has already been set up + logging.basicConfig() + else: + logging.getLogger("waitress").setLevel(logging.ERROR) + + self.server = create_server(app, host="127.0.0.1", port=0) + if devMode: + print( + "Serving on http://%s:%s" + % (self.server.effective_host, self.server.effective_port) + ) + + self._ready.set() + self.server.run() + + except Exception: + if not self.is_shutdown: + raise + + def shutdown(self): + self.is_shutdown = True + sockets = list(self.server._map.values()) + for socket in sockets: + socket.handle_close() + # https://github.com/Pylons/webtest/blob/4b8a3ebf984185ff4fefb31b4d0cf82682e1fcf7/webtest/http.py#L93-L104 + self.server.task_dispatcher.shutdown() def getPort(self): self._ready.wait() - return self.server.server_port - - def shutdown(self): - self.server.shutdown() + return int(self.server.effective_port) -class RequestHandler(http.server.SimpleHTTPRequestHandler): - - timeout = 10 - mw: Optional[aqt.main.AnkiQt] = None - - def do_GET(self): - f = self.send_head() - if f: - try: - self.copyfile(f, self.wfile) - except Exception as e: - if devMode: - print("http server caught exception:", e) - else: - # swallow it - user likely surfed away from - # review screen before an image had finished - # downloading - pass - finally: - f.close() - - def send_head(self): - path = self.translate_path(self.path) - path = self._redirectWebExports(path) - try: - isdir = os.path.isdir(path) - except ValueError: - # path too long exception on Windows - self.send_error(HTTPStatus.NOT_FOUND, "File not found") - return None - - if isdir: - self.send_error(HTTPStatus.NOT_FOUND, "File not found") - return None - - ctype = self.guess_type(path) - try: - f = open(path, "rb") - except OSError: - self.send_error(HTTPStatus.NOT_FOUND, "File not found") - return None - try: - self.send_response(HTTPStatus.OK) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.send_header("Access-Control-Allow-Origin", "*") - self.end_headers() - return f - except: - f.close() - raise - - def log_message(self, format, *args): - if not devMode: - return - print( - "%s - - [%s] %s" - % (self.address_string(), self.log_date_time_string(), format % args) +@app.route("/", defaults={"path": ""}) +@app.route("/") +def allroutes(path): + directory, path = _redirectWebExports(path) + try: + isdir = os.path.isdir(os.path.join(directory, path)) + except ValueError: + return flask.Response( + "Path for '%s - %s' is too long!" % (directory, path), + status=HTTPStatus.BAD_REQUEST, + mimetype="text/plain", ) - def _redirectWebExports(self, path): - # catch /_anki references and rewrite them to web export folder - targetPath = os.path.join(os.getcwd(), "_anki", "") - if path.startswith(targetPath): - newPath = os.path.join(_exportFolder, path[len(targetPath) :]) - return newPath + if isdir: + return flask.Response( + "Path for '%s - %s' is a directory (not supported)!" % (directory, path), + status=HTTPStatus.FORBIDDEN, + mimetype="text/plain", + ) - # catch /_addons references and rewrite them to addons folder - targetPath = os.path.join(os.getcwd(), "_addons", "") - if path.startswith(targetPath): - try: - addMgr = self.mw.addonManager - except AttributeError: - return path - - addonPath = path[len(targetPath) :] - - try: - addon, subPath = addonPath.split(os.path.sep, 1) - except ValueError: - return path - if not addon: - return path - - pattern = addMgr.getWebExports(addon) - if not pattern: - return path - - subPath2 = subPath.replace(os.sep, "/") - if re.fullmatch(pattern, subPath) or re.fullmatch(pattern, subPath2): - newPath = os.path.join(addMgr.addonsFolder(), addonPath) - return newPath - - return path - - def do_POST(self): - if not self.path.startswith("/_anki/"): - self.send_error(HTTPStatus.NOT_FOUND, "Method not found") - return - - cmd = self.path[len("/_anki/") :] - - if cmd == "graphData": - content_length = int(self.headers["Content-Length"]) - body = self.rfile.read(content_length) - data = graph_data(self.mw.col, **from_json_bytes(body)) - elif cmd == "i18nResources": - data = self.mw.col.backend.i18n_resources() - else: - self.send_error(HTTPStatus.NOT_FOUND, "Method not found") - return - - self.send_response(HTTPStatus.OK) - self.send_header("Content-Type", "application/binary") - self.send_header("Content-Length", str(len(data))) - self.send_header("Access-Control-Allow-Origin", "*") - self.end_headers() - - self.wfile.write(data) - - -def graph_data(col: Collection, search: str, days: int) -> bytes: try: - return col.backend.graphs(search=search, days=days) - except Exception as e: - # likely searching error - print(e) - return b"" + if devMode: + print("Sending file '%s - %s'" % (directory, path)) + return flask.send_from_directory(directory, path) + + except Exception as error: + if devMode: + print( + "Caught HTTP server exception,\n%s" + % "".join(traceback.format_exception(*sys.exc_info())), + ) + + # swallow it - user likely surfed away from + # review screen before an image had finished + # downloading + return flask.Response( + "For path '%s - %s' %s!" % (directory, path, error), + status=HTTPStatus.INTERNAL_SERVER_ERROR, + mimetype="text/plain", + ) -# work around Windows machines with incorrect mime type -RequestHandler.extensions_map[".css"] = "text/css" +def _redirectWebExports(path): + # catch /_anki references and rewrite them to web export folder + targetPath = "_anki/" + if path.startswith(targetPath): + return _exportFolder, path[len(targetPath) :] + + # catch /_addons references and rewrite them to addons folder + targetPath = "_addons/" + if path.startswith(targetPath): + addonPath = path[len(targetPath) :] + + try: + addMgr = _redirectWebExports.mw.addonManager + except AttributeError as error: + if devMode: + print("_redirectWebExports: %s" % error) + return _exportFolder, addonPath + + try: + addon, subPath = addonPath.split(os.path.sep, 1) + except ValueError: + return addMgr.addonsFolder(), path + if not addon: + return addMgr.addonsFolder(), path + + pattern = addMgr.getWebExports(addon) + if not pattern: + return addMgr.addonsFolder(), path + + if re.fullmatch(pattern, subPath): + return addMgr.addonsFolder(), addonPath + + return _redirectWebExports.mw.col.media.dir(), path diff --git a/qt/setup.py b/qt/setup.py index dbaa859ba..e370e2dd5 100644 --- a/qt/setup.py +++ b/qt/setup.py @@ -28,6 +28,9 @@ install_requires = [ "jsonschema", # "pyaudio", # https://anki.tenderapp.com/discussions/add-ons/44009-problems-with-code-completion # "pyqtwebengine", # https://github.com/ankitects/anki/pull/530 - Set to checks.yml install and import anki wheels + "flask", + "flask_cors", + "waitress", "pyqt5>=5.9", 'psutil; sys.platform == "win32"', 'pywin32; sys.platform == "win32"', From be10da58d93183d06194f2f80e9774da2e76f75e Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Fri, 5 Jun 2020 19:44:54 -0300 Subject: [PATCH 2/7] Fixed aqt/mediasrv.py:14: error: unused 'type: ignore' comment from waitress.server import create_server # type: ignore https://github.com/evandroforks/anki/runs/743801391#step:25:1129 --- qt/aqt/mediasrv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index cd8c0ab7b..bcc286d35 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -11,7 +11,7 @@ from http import HTTPStatus import flask import flask_cors # type: ignore -from waitress.server import create_server # type: ignore +from waitress.server import create_server from anki.utils import devMode from aqt.qt import * From 1e6fa5f8eb1929e78535f048886bd9ec64b491a5 Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Thu, 18 Jun 2020 19:58:39 -0300 Subject: [PATCH 3/7] Ensure protection against directory transversal https://security.openstack.org/guidelines/dg_using-file-paths.html --- qt/aqt/mediasrv.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index bcc286d35..411322a9f 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -93,6 +93,18 @@ def allroutes(path): mimetype="text/plain", ) + directory = os.path.realpath(directory) + path = os.path.normpath(path) + fullpath = os.path.realpath(os.path.join(directory, path)) + + # protect against directory transversal: https://security.openstack.org/guidelines/dg_using-file-paths.html + if not fullpath.startswith(directory): + return flask.Response( + "Path for '%s - %s' is a security leak!" % (directory, path), + status=HTTPStatus.FORBIDDEN, + mimetype="text/plain", + ) + if isdir: return flask.Response( "Path for '%s - %s' is a directory (not supported)!" % (directory, path), @@ -103,6 +115,9 @@ def allroutes(path): try: if devMode: print("Sending file '%s - %s'" % (directory, path)) + + path = os.path.basename(fullpath) + directory = os.path.dirname(fullpath) return flask.send_from_directory(directory, path) except Exception as error: From 0142ac3ca22dd7beaa5292ac239616af415df894 Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Fri, 19 Jun 2020 10:10:08 -0300 Subject: [PATCH 4/7] Replaced send_from_directory by send_file to simplify the code --- qt/aqt/mediasrv.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index 411322a9f..fb13309e1 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -116,9 +116,7 @@ def allroutes(path): if devMode: print("Sending file '%s - %s'" % (directory, path)) - path = os.path.basename(fullpath) - directory = os.path.dirname(fullpath) - return flask.send_from_directory(directory, path) + return flask.send_file(fullpath, conditional=True) except Exception as error: if devMode: From 1218d109e83ed194b44569154d1b8b158831efed Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Thu, 2 Jul 2020 14:30:43 -0300 Subject: [PATCH 5/7] Fix jest unit tests after merge # Conflicts: # qt/aqt/mediasrv.py # qt/ts/src/ankimedia.ts # qt/ts/src/reviewer-exceptions.test.ts # qt/ts/src/reviewer.test.ts # ts/package.json --- qt/aqt/mediasrv.py | 58 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index fb13309e1..f9cb62b74 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -1,6 +1,9 @@ # Copyright: Ankitects Pty Ltd and contributors # -*- coding: utf-8 -*- # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +from __future__ import annotations + import logging import os import re @@ -11,8 +14,12 @@ from http import HTTPStatus import flask import flask_cors # type: ignore +from flask import request from waitress.server import create_server +import aqt +from anki.collection import Collection +from anki.rsbackend import from_json_bytes from anki.utils import devMode from aqt.qt import * from aqt.utils import aqt_data_folder @@ -40,10 +47,11 @@ class MediaServer(threading.Thread): _ready = threading.Event() daemon = True - def __init__(self, mw, *args, **kwargs): + def __init__(self, mw: aqt.main.AnkiQt, *args, **kwargs): super().__init__(*args, **kwargs) self.is_shutdown = False - _redirectWebExports.mw = mw + _redirectWebExports.mw = mw # type: ignore + allroutes.mw = mw # type: ignore def run(self): try: @@ -53,7 +61,8 @@ class MediaServer(threading.Thread): else: logging.getLogger("waitress").setLevel(logging.ERROR) - self.server = create_server(app, host="127.0.0.1", port=0) + desired_port = int(os.getenv("ANKI_API_PORT", "0")) + self.server = create_server(app, host="127.0.0.1", port=desired_port) if devMode: print( "Serving on http://%s:%s" @@ -81,9 +90,9 @@ class MediaServer(threading.Thread): @app.route("/", defaults={"path": ""}) -@app.route("/") -def allroutes(path): - directory, path = _redirectWebExports(path) +@app.route("/", methods=["GET", "POST"]) +def allroutes(pathin): + directory, path = _redirectWebExports(pathin) try: isdir = os.path.isdir(os.path.join(directory, path)) except ValueError: @@ -112,9 +121,33 @@ def allroutes(path): mimetype="text/plain", ) + if devMode: + print("Sending file '%s - %s'" % (directory, path)) + try: - if devMode: - print("Sending file '%s - %s'" % (directory, path)) + if flask.request.method == "POST": + if not pathin.startswith("_anki/"): + return flask.Response( + "Path for '%s - %s' is a security leak!" % (directory, path), + status=HTTPStatus.FORBIDDEN, + mimetype="text/plain", + ) + + if path == "graphData": + body = request.data + data = graph_data(allroutes.mw.col, **from_json_bytes(body)) + elif path == "i18nResources": + data = allroutes.mw.col.backend.i18n_resources() + else: + return flask.Response( + "Path for '%s - %s' is a security leak!" % (directory, path), + status=HTTPStatus.FORBIDDEN, + mimetype="text/plain", + ) + + response = flask.make_response(data) + response.headers["Content-Type"] = "application/binary" + return response return flask.send_file(fullpath, conditional=True) @@ -168,3 +201,12 @@ def _redirectWebExports(path): return addMgr.addonsFolder(), addonPath return _redirectWebExports.mw.col.media.dir(), path + + +def graph_data(col: Collection, search: str, days: int) -> bytes: + try: + return col.backend.graphs(search=search, days=days) + except Exception as e: + # likely searching error + print(e) + return b"" From a99e45541463078c29dc24708f25aa3af694e2ab Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Thu, 2 Jul 2020 20:22:20 -0300 Subject: [PATCH 6/7] Removed duplicated mediasrv.py security check and fixed invalid command/path error message. --- qt/aqt/mediasrv.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index f9cb62b74..4f42c5511 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -126,13 +126,6 @@ def allroutes(pathin): try: if flask.request.method == "POST": - if not pathin.startswith("_anki/"): - return flask.Response( - "Path for '%s - %s' is a security leak!" % (directory, path), - status=HTTPStatus.FORBIDDEN, - mimetype="text/plain", - ) - if path == "graphData": body = request.data data = graph_data(allroutes.mw.col, **from_json_bytes(body)) @@ -140,7 +133,7 @@ def allroutes(pathin): data = allroutes.mw.col.backend.i18n_resources() else: return flask.Response( - "Path for '%s - %s' is a security leak!" % (directory, path), + "Post request to '%s - %s' is a security leak!" % (directory, path), status=HTTPStatus.FORBIDDEN, mimetype="text/plain", ) From 476b8819879942b0d69137d60e7520fafc522441 Mon Sep 17 00:00:00 2001 From: evandrocoan Date: Thu, 2 Jul 2020 20:38:27 -0300 Subject: [PATCH 7/7] Replaced flask.Response by flask.make_response to simplify the implementation and because make_response is preferred over Response as it respects the server defined Response type. --- qt/aqt/mediasrv.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py index 4f42c5511..51356106a 100644 --- a/qt/aqt/mediasrv.py +++ b/qt/aqt/mediasrv.py @@ -96,10 +96,9 @@ def allroutes(pathin): try: isdir = os.path.isdir(os.path.join(directory, path)) except ValueError: - return flask.Response( + return flask.make_response( "Path for '%s - %s' is too long!" % (directory, path), - status=HTTPStatus.BAD_REQUEST, - mimetype="text/plain", + HTTPStatus.BAD_REQUEST, ) directory = os.path.realpath(directory) @@ -108,17 +107,15 @@ def allroutes(pathin): # protect against directory transversal: https://security.openstack.org/guidelines/dg_using-file-paths.html if not fullpath.startswith(directory): - return flask.Response( + return flask.make_response( "Path for '%s - %s' is a security leak!" % (directory, path), - status=HTTPStatus.FORBIDDEN, - mimetype="text/plain", + HTTPStatus.FORBIDDEN, ) if isdir: - return flask.Response( + return flask.make_response( "Path for '%s - %s' is a directory (not supported)!" % (directory, path), - status=HTTPStatus.FORBIDDEN, - mimetype="text/plain", + HTTPStatus.FORBIDDEN, ) if devMode: @@ -132,10 +129,9 @@ def allroutes(pathin): elif path == "i18nResources": data = allroutes.mw.col.backend.i18n_resources() else: - return flask.Response( + return flask.make_response( "Post request to '%s - %s' is a security leak!" % (directory, path), - status=HTTPStatus.FORBIDDEN, - mimetype="text/plain", + HTTPStatus.FORBIDDEN, ) response = flask.make_response(data) @@ -154,10 +150,9 @@ def allroutes(pathin): # swallow it - user likely surfed away from # review screen before an image had finished # downloading - return flask.Response( + return flask.make_response( "For path '%s - %s' %s!" % (directory, path, error), - status=HTTPStatus.INTERNAL_SERVER_ERROR, - mimetype="text/plain", + HTTPStatus.INTERNAL_SERVER_ERROR, )