anki/qt/aqt/mediasrv.py
RumovZ 42cbe42f06
Plaintext import/export (#1850)
* Add crate csv

* Add start of csv importing on backend

* Add Menomosyne serializer

* Add csv and json importing on backend

* Add plaintext importing on frontend

* Add csv metadata extraction on backend

* Add csv importing with GUI

* Fix missing dfa file in build

Added compile_data_attr, then re-ran cargo/update.py.

* Don't use doubly buffered reader in csv

* Escape HTML entities if CSV is not HTML

Also use name 'is_html' consistently.

* Use decimal number as foreign ease (like '2.5')

* ForeignCard.ivl → ForeignCard.interval

* Only allow fixed set of CSV delimiters

* Map timestamp of ForeignCard to native due time

* Don't trim CSV records

* Document use of empty strings for defaults

* Avoid creating CardGenContexts for every note

This requires CardGenContext to be generic, so it works both with an
owned and borrowed notetype.

* Show all accepted file types  in import file picker

* Add import_json_file()

* factor → ease_factor

* delimter_from_value → delimiter_from_value

* Map columns to fields, not the other way around

* Fallback to current config for csv metadata

* Add start of new import csv screen

* Temporary fix for compilation issue on Linux/Mac

* Disable jest bazel action for import-csv

Jest fails with an error code if no tests are available, but this would
not be noticable on Windows as Jest is not run there.

* Fix field mapping issue

* Revert "Temporary fix for compilation issue on Linux/Mac"

This reverts commit 21f8a261408cdae49ec031aa21a1b659c4f66d82.

* Add HtmlSwitch and move Switch to components

* Fix spacing and make selectors consistent

* Fix shortcut tooltip

* Place import button at the top with path

* Fix meta column indices

* Remove NotetypeForString

* Fix queue and type of foreign cards

* Support different dupe resolution strategies

* Allow dupe resolution selection when importing CSV

* Test import of unnormalized text

Close  #1863.

* Fix logging of foreign notes

* Implement CSV exports

* Use db_scalar() in notes_table_len()

* Rework CSV metadata

- Notetypes and decks are either defined by a global id or by a column.
- If a notetype id is provided, its field map must also be specified.
- If a notetype column is provided, fields are now mapped by index
instead of name at import time. So the first non-meta column is used for
the first field of every note, regardless of notetype. This makes
importing easier and should improve compatiblity with files without a
notetype column.
- Ensure first field can be mapped to a column.
- Meta columns must be defined as `#[meta name]:[column index]` instead
of in the `#columns` tag.
- Column labels contain the raw names defined by the file and must be
prettified by the frontend.

* Adjust frontend to new backend column mapping

* Add force flags for is_html and delimiter

* Detect if CSV is HTML by field content

* Update dupe resolution labels

* Simplify selectors

* Fix coalescence of oneofs in TS

* Disable meta columns from selection

Plus a lot of refactoring.

* Make import button stick to the bottom

* Write delimiter and html flag into csv

* Refetch field map after notetype change

* Fix log labels for csv import

* Log notes whose deck/notetype was missing

* Fix hiding of empty log queues

* Implement adding tags to all notes of a csv

* Fix dupe resolution not being set in log

* Implement adding tags to updated notes of a csv

* Check first note field is not empty

* Temporary fix for build on Linux/Mac

* Fix inverted html check (dae)

* Remove unused ftl string

* Delimiter → Separator

* Remove commented-out line

* Don't accept .json files

* Tweak tag ftl strings

* Remove redundant blur call

* Strip sound and add spaces in csv export

* Export HTML by default

* Fix unset deck in Mnemosyne import

Also accept both numbers and strings for notetypes and decks in JSON.

* Make DupeResolution::Update the default

* Fix missing dot in extension

* Make column indices 1-based

* Remove StickContainer from TagEditor

Fixes line breaking, border and z index on ImportCsvPage.

* Assign different key combos to tag editors

* Log all updated duplicates

Add a log field for the true number of found notes.

* Show identical notes as skipped

* Split tag-editor into separate ts module (dae)

* Add progress for CSV export

* Add progress for text import

* Tidy-ups after tag-editor split (dae)

- import-csv no longer depends on editor
- remove some commented lines
2022-06-01 20:26:16 +10:00

550 lines
16 KiB
Python

# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
from __future__ import annotations
import logging
import mimetypes
import os
import re
import sys
import threading
import time
import traceback
from dataclasses import dataclass
from http import HTTPStatus
from typing import Callable
import flask
import flask_cors # type: ignore
from flask import Response, request
from waitress.server import create_server
import aqt
import aqt.main
import aqt.operations
from anki import hooks
from anki._vendor import stringcase
from anki.collection import OpChanges
from anki.decks import DeckConfigsForUpdate, UpdateDeckConfigs
from anki.scheduler.v3 import NextStates
from anki.utils import dev_mode
from aqt.changenotetype import ChangeNotetypeDialog
from aqt.deckoptions import DeckOptionsDialog
from aqt.import_export.import_csv_dialog import ImportCsvDialog
from aqt.operations.deck import update_deck_configs as update_deck_configs_op
from aqt.qt import *
app = flask.Flask(__name__, root_path="/fake")
flask_cors.CORS(app)
@dataclass
class LocalFileRequest:
# base folder, eg media folder
root: str
# path to file relative to root folder
path: str
@dataclass
class BundledFileRequest:
# path relative to aqt data folder
path: str
@dataclass
class NotFound:
message: str
DynamicRequest = Callable[[], Response]
class MediaServer(threading.Thread):
_ready = threading.Event()
daemon = True
def __init__(self, mw: aqt.main.AnkiQt) -> None:
super().__init__()
self.is_shutdown = False
# map of webview ids to pages
self._page_html: dict[int, str] = {}
def run(self) -> None:
try:
if dev_mode:
# idempotent if logging has already been set up
logging.basicConfig()
logging.getLogger("waitress").setLevel(logging.ERROR)
desired_host = os.getenv("ANKI_API_HOST", "127.0.0.1")
desired_port = int(os.getenv("ANKI_API_PORT", "0"))
self.server = create_server(
app,
host=desired_host,
port=desired_port,
clear_untrusted_proxy_headers=True,
)
if dev_mode:
print(
"Serving on http://%s:%s"
% (self.server.effective_host, self.server.effective_port) # type: ignore
)
self._ready.set()
self.server.run()
except Exception:
if not self.is_shutdown:
raise
def shutdown(self) -> None:
self.is_shutdown = True
sockets = list(self.server._map.values()) # type: ignore
for socket in sockets:
socket.handle_close()
# https://github.com/Pylons/webtest/blob/4b8a3ebf984185ff4fefb31b4d0cf82682e1fcf7/webtest/http.py#L93-L104
self.server.task_dispatcher.shutdown()
def getPort(self) -> int:
self._ready.wait()
return int(self.server.effective_port) # type: ignore
def set_page_html(self, id: int, html: str) -> None:
self._page_html[id] = html
def get_page_html(self, id: int) -> str | None:
return self._page_html.get(id)
def clear_page_html(self, id: int) -> None:
try:
del self._page_html[id]
except KeyError:
pass
@app.route("/favicon.ico")
def favicon() -> Response:
request = BundledFileRequest(os.path.join("imgs", "favicon.ico"))
return _handle_builtin_file_request(request)
def _mime_for_path(path: str) -> str:
"Mime type for provided path/filename."
if path.endswith(".css"):
# some users may have invalid mime type in the Windows registry
return "text/css"
elif path.endswith(".js"):
return "application/javascript"
else:
# autodetect
mime, _encoding = mimetypes.guess_type(path)
return mime or "application/octet-stream"
def _handle_local_file_request(request: LocalFileRequest) -> Response:
directory = request.root
path = request.path
try:
isdir = os.path.isdir(os.path.join(directory, path))
except ValueError:
return flask.make_response(
f"Path for '{directory} - {path}' is too long!",
HTTPStatus.BAD_REQUEST,
)
directory = os.path.realpath(directory)
path = os.path.normpath(path)
fullpath = os.path.abspath(os.path.join(directory, path))
# protect against directory transversal: https://security.openstack.org/guidelines/dg_using-file-paths.html
if not fullpath.startswith(directory):
return flask.make_response(
f"Path for '{directory} - {path}' is a security leak!",
HTTPStatus.FORBIDDEN,
)
if isdir:
return flask.make_response(
f"Path for '{directory} - {path}' is a directory (not supported)!",
HTTPStatus.FORBIDDEN,
)
try:
mimetype = _mime_for_path(fullpath)
if os.path.exists(fullpath):
if fullpath.endswith(".css"):
# caching css files prevents flicker in the webview, but we want
# a short cache
max_age = 10
elif fullpath.endswith(".js"):
# don't cache js files
max_age = 0
else:
max_age = 60 * 60
return flask.send_file(
fullpath, mimetype=mimetype, conditional=True, max_age=max_age # type: ignore[call-arg]
)
else:
print(f"Not found: {path}")
return flask.make_response(
f"Invalid path: {path}",
HTTPStatus.NOT_FOUND,
)
except Exception as error:
if dev_mode:
print(
"Caught HTTP server exception,\n%s"
% "".join(traceback.format_exception(*sys.exc_info())),
)
# swallow it - user likely surfed away from
# review screen before an image had finished
# downloading
return flask.make_response(
str(error),
HTTPStatus.INTERNAL_SERVER_ERROR,
)
def _builtin_data(path: str) -> bytes:
"""Return data from file in aqt/data folder.
Path must use forward slash separators."""
# overriden location?
if data_folder := os.getenv("ANKI_DATA_FOLDER"):
full_path = os.path.join(data_folder, path)
with open(full_path, "rb") as f:
return f.read()
else:
if is_win and not getattr(sys, "frozen", False):
# default Python resource loader expects backslashes on Windows
path = path.replace("/", "\\")
reader = aqt.__loader__.get_resource_reader("aqt") # type: ignore
with reader.open_resource(path) as f:
return f.read()
def _handle_builtin_file_request(request: BundledFileRequest) -> Response:
path = request.path
mimetype = _mime_for_path(path)
data_path = f"data/web/{path}"
try:
data = _builtin_data(data_path)
return Response(data, mimetype=mimetype)
except FileNotFoundError:
return flask.make_response(
f"Invalid path: {path}",
HTTPStatus.NOT_FOUND,
)
except Exception as error:
if dev_mode:
print(
"Caught HTTP server exception,\n%s"
% "".join(traceback.format_exception(*sys.exc_info())),
)
# swallow it - user likely surfed away from
# review screen before an image had finished
# downloading
return flask.make_response(
str(error),
HTTPStatus.INTERNAL_SERVER_ERROR,
)
@app.route("/<path:pathin>", methods=["GET", "POST"])
def handle_request(pathin: str) -> Response:
request = _extract_request(pathin)
if dev_mode:
print(f"{time.time():.3f} {flask.request.method} /{pathin}")
if isinstance(request, NotFound):
print(request.message)
return flask.make_response(
f"Invalid path: {pathin}",
HTTPStatus.NOT_FOUND,
)
elif callable(request):
return _handle_dynamic_request(request)
elif isinstance(request, BundledFileRequest):
return _handle_builtin_file_request(request)
elif isinstance(request, LocalFileRequest):
return _handle_local_file_request(request)
else:
return flask.make_response(
f"unexpected request: {pathin}",
HTTPStatus.FORBIDDEN,
)
def _extract_internal_request(
path: str,
) -> BundledFileRequest | DynamicRequest | NotFound | None:
"Catch /_anki references and rewrite them to web export folder."
prefix = "_anki/"
if not path.startswith(prefix):
return None
dirname = os.path.dirname(path)
filename = os.path.basename(path)
additional_prefix = None
if dirname == "_anki":
if flask.request.method == "POST":
return _extract_collection_post_request(filename)
elif get_handler := _extract_dynamic_get_request(filename):
return get_handler
# remap legacy top-level references
base, ext = os.path.splitext(filename)
if ext == ".css":
additional_prefix = "css/"
elif ext == ".js":
if base in ("browsersel", "jquery-ui", "jquery", "plot"):
additional_prefix = "js/vendor/"
else:
additional_prefix = "js/"
# handle requests for vendored libraries
elif dirname == "_anki/js/vendor":
base, ext = os.path.splitext(filename)
if base == "jquery":
base = "jquery.min"
additional_prefix = "js/vendor/"
elif base == "jquery-ui":
base = "jquery-ui.min"
additional_prefix = "js/vendor/"
elif base == "browsersel":
base = "css_browser_selector.min"
additional_prefix = "js/vendor/"
if additional_prefix:
oldpath = path
path = f"{prefix}{additional_prefix}{base}{ext}"
print(f"legacy {oldpath} remapped to {path}")
return BundledFileRequest(path=path[len(prefix) :])
def _extract_addon_request(path: str) -> LocalFileRequest | NotFound | None:
"Catch /_addons references and rewrite them to addons folder."
prefix = "_addons/"
if not path.startswith(prefix):
return None
addon_path = path[len(prefix) :]
try:
manager = aqt.mw.addonManager
except AttributeError as error:
if dev_mode:
print(f"_redirectWebExports: {error}")
return None
try:
addon, sub_path = addon_path.split("/", 1)
except ValueError:
return None
if not addon:
return None
pattern = manager.getWebExports(addon)
if not pattern:
return None
if re.fullmatch(pattern, sub_path):
return LocalFileRequest(root=manager.addonsFolder(), path=addon_path)
return NotFound(message=f"couldn't locate item in add-on folder {path}")
def _extract_request(
path: str,
) -> LocalFileRequest | BundledFileRequest | DynamicRequest | NotFound:
if internal := _extract_internal_request(path):
return internal
elif addon := _extract_addon_request(path):
return addon
if not aqt.mw.col:
return NotFound(message=f"collection not open, ignore request for {path}")
path = hooks.media_file_filter(path)
return LocalFileRequest(root=aqt.mw.col.media.dir(), path=path)
def congrats_info() -> bytes:
if not aqt.mw.col.sched._is_finished():
aqt.mw.taskman.run_on_main(lambda: aqt.mw.moveToState("review"))
return raw_backend_request("congrats_info")()
def get_deck_configs_for_update() -> bytes:
config_bytes = aqt.mw.col._backend.get_deck_configs_for_update_raw(request.data)
configs = DeckConfigsForUpdate.FromString(config_bytes)
configs.have_addons = aqt.mw.addonManager.dirty
return configs.SerializeToString()
def update_deck_configs() -> bytes:
# the regular change tracking machinery expects to be started on the main
# thread and uses a callback on success, so we need to run this op on
# main, and return immediately from the web request
input = UpdateDeckConfigs()
input.ParseFromString(request.data)
def on_success(changes: OpChanges) -> None:
if isinstance(window := aqt.mw.app.activeWindow(), DeckOptionsDialog):
window.reject()
def handle_on_main() -> None:
update_deck_configs_op(parent=aqt.mw, input=input).success(
on_success
).run_in_background()
aqt.mw.taskman.run_on_main(handle_on_main)
return b""
def next_card_states() -> bytes:
if states := aqt.mw.reviewer.get_next_states():
return states.SerializeToString()
else:
return b""
def set_next_card_states() -> bytes:
key = request.headers.get("key", "")
input = NextStates()
input.ParseFromString(request.data)
aqt.mw.reviewer.set_next_states(key, input)
return b""
def change_notetype() -> bytes:
data = request.data
def handle_on_main() -> None:
window = aqt.mw.app.activeWindow()
if isinstance(window, ChangeNotetypeDialog):
window.save(data)
aqt.mw.taskman.run_on_main(handle_on_main)
return b""
def import_csv() -> bytes:
data = request.data
def handle_on_main() -> None:
window = aqt.mw.app.activeWindow()
if isinstance(window, ImportCsvDialog):
window.do_import(data)
aqt.mw.taskman.run_on_main(handle_on_main)
return b""
post_handler_list = [
congrats_info,
get_deck_configs_for_update,
update_deck_configs,
next_card_states,
set_next_card_states,
change_notetype,
import_csv,
]
exposed_backend_list = [
# DeckService
"get_deck_names",
# I18nService
"i18n_resources",
# ImportExportService
"get_csv_metadata",
# NotesService
"get_field_names",
"get_note",
# NotetypesService
"get_notetype_names",
"get_change_notetype_info",
# StatsService
"card_stats",
"graphs",
"get_graph_preferences",
"set_graph_preferences",
# TagsService
"complete_tag",
]
def raw_backend_request(endpoint: str) -> Callable[[], bytes]:
# check for key at startup
from anki._backend import RustBackend
assert hasattr(RustBackend, f"{endpoint}_raw")
return lambda: getattr(aqt.mw.col._backend, f"{endpoint}_raw")(request.data)
# all methods in here require a collection
post_handlers = {
stringcase.camelcase(handler.__name__): handler for handler in post_handler_list
} | {
stringcase.camelcase(handler): raw_backend_request(handler)
for handler in exposed_backend_list
}
def _extract_collection_post_request(path: str) -> DynamicRequest | NotFound:
if not aqt.mw.col:
return NotFound(message=f"collection not open, ignore request for {path}")
if handler := post_handlers.get(path):
# convert bytes/None into response
def wrapped() -> Response:
try:
if data := handler():
response = flask.make_response(data)
response.headers["Content-Type"] = "application/binary"
else:
response = flask.make_response("", HTTPStatus.NO_CONTENT)
except:
print(traceback.format_exc())
response = flask.make_response("", HTTPStatus.INTERNAL_SERVER_ERROR)
return response
return wrapped
else:
return NotFound(message=f"{path} not found")
def _handle_dynamic_request(request: DynamicRequest) -> Response:
try:
return request()
except Exception as e:
return flask.make_response(str(e), HTTPStatus.INTERNAL_SERVER_ERROR)
def legacy_page_data() -> Response:
id = int(request.args["id"])
if html := aqt.mw.mediaServer.get_page_html(id):
return Response(html, mimetype="text/html")
else:
return flask.make_response("page not found", HTTPStatus.NOT_FOUND)
# this currently only handles a single method; in the future, idempotent
# requests like i18nResources should probably be moved here
def _extract_dynamic_get_request(path: str) -> DynamicRequest | None:
if path == "legacyPageData":
return legacy_page_data
else:
return None