From b876d97770f7c1794bbc24c3c627425d55e4d532 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Tue, 3 Mar 2020 15:36:05 +1000 Subject: [PATCH] use (or)json for DB bridge Some initial testing with orjson indicates performance varies from slightly better than pysqlite to about 2x slower depending on the type of query. Performance could be improved by building the Python list in rspy instead of sending back json that needs to be decoded, but it may make more sense to rewrite the hotspots in Rust instead. More testing is required in any case. --- pylib/anki/collection.py | 3 +- pylib/anki/dbproxy.py | 44 ++++++++--------- pylib/anki/rsbackend.py | 6 +++ pylib/setup.py | 1 + rslib/src/backend/dbproxy.rs | 94 +++++++++++++++++++++++++++++------- rslib/src/backend/mod.rs | 5 ++ rslib/src/storage/sqlite.rs | 1 + rspy/src/lib.rs | 7 +++ 8 files changed, 118 insertions(+), 43 deletions(-) diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 30132eafd..409ab5fde 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -266,8 +266,9 @@ crt=?, mod=?, scm=?, dty=?, usn=?, ls=?, conf=?""", def reopen(self) -> None: "Reconnect to DB (after changing threads, etc)." + raise Exception("fixme") if not self.db: - self.db = DBProxy(self.path) + #self.db = DBProxy(self.path) self.media.connect() self._openLog() diff --git a/pylib/anki/dbproxy.py b/pylib/anki/dbproxy.py index be73a7410..84c3c45d7 100644 --- a/pylib/anki/dbproxy.py +++ b/pylib/anki/dbproxy.py @@ -1,12 +1,15 @@ # Copyright: Ankitects Pty Ltd and contributors # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -# fixme: lossy utf8 handling -# fixme: progress +from __future__ import annotations -from sqlite3 import dbapi2 as sqlite from typing import Any, Iterable, List, Optional, Sequence, Union +import anki + +# fixme: remember to null on close to avoid circular ref +# fixme: progress + # DBValue is actually Union[str, int, float, None], but if defined # that way, every call site needs to do a type check prior to using # the return values. @@ -20,28 +23,29 @@ class DBProxy: # Lifecycle ############### - def __init__(self, path: str) -> None: - self._db = sqlite.connect(path, timeout=0) + def __init__(self, backend: anki.rsbackend.RustBackend, path: str) -> None: + self._backend = backend self._path = path self.mod = False def close(self) -> None: - self._db.close() + # fixme + pass # Transactions ############### def commit(self) -> None: - self._db.commit() + # fixme + pass def rollback(self) -> None: - self._db.rollback() + # fixme + pass def setAutocommit(self, autocommit: bool) -> None: - if autocommit: - self._db.isolation_level = None - else: - self._db.isolation_level = "" + # fixme + pass # Querying ################ @@ -55,16 +59,8 @@ class DBProxy: if s.startswith(stmt): self.mod = True # fetch rows - curs = self._db.execute(sql, args) - if first_row_only: - row = curs.fetchone() - curs.close() - if row is not None: - return [row] - else: - return [] - else: - return curs.fetchall() + # fixme: first_row_only + return self._backend.db_query_json(sql, args) # Query shortcuts ################### @@ -98,8 +94,8 @@ class DBProxy: def executemany(self, sql: str, args: Iterable[Iterable[ValueForDB]]) -> None: self.mod = True - self._db.executemany(sql, args) + raise Exception("fixme") def executescript(self, sql: str) -> None: self.mod = True - self._db.executescript(sql) + raise Exception("fixme") diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index 18b405031..761a721c3 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -18,6 +18,7 @@ from typing import ( Any) import ankirspy # pytype: disable=import-error +import orjson import anki.backend_pb2 as pb import anki.buildinfo @@ -421,6 +422,11 @@ class RustBackend: return map(sqlrow_to_tuple, output.rows) + def db_query_json(self, sql: str, args: Iterable[ValueForDB]) -> List[DBRow]: + input = orjson.dumps(dict(sql=sql, args=args)) + output = self._backend.db_query(input) + return orjson.loads(output) + def translate_string_in( key: TR, **kwargs: Union[str, int, float] ) -> pb.TranslateStringIn: diff --git a/pylib/setup.py b/pylib/setup.py index ffb93affd..b23d783d7 100644 --- a/pylib/setup.py +++ b/pylib/setup.py @@ -21,6 +21,7 @@ setuptools.setup( "requests", "decorator", "protobuf", + "orjson", 'psutil; sys_platform == "win32"', 'distro; sys_platform != "darwin" and sys_platform != "win32"', ], diff --git a/rslib/src/backend/dbproxy.rs b/rslib/src/backend/dbproxy.rs index 3d8cca005..9e6bbe611 100644 --- a/rslib/src/backend/dbproxy.rs +++ b/rslib/src/backend/dbproxy.rs @@ -6,27 +6,85 @@ use crate::err::Result; use crate::storage::SqliteStorage; use rusqlite::types::{FromSql, FromSqlError, ToSql, ToSqlOutput, ValueRef}; use serde_derive::{Deserialize, Serialize}; -// -// #[derive(Deserialize)] -// struct DBRequest { -// sql: String, -// args: Vec, -// } -// + +// json implementation + +#[derive(Deserialize)] +pub(super) struct DBRequest { + sql: String, + args: Vec, +} + // #[derive(Serialize)] -// struct DBResult { +// pub(super) struct DBResult { // rows: Vec>, // } -// -// #[derive(Serialize, Deserialize, Debug)] -// #[serde(untagged)] -// enum SqlValue { -// Null, -// String(String), -// Int(i64), -// Float(f64), -// Blob(Vec), -// } +type DBResult = Vec>; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub(super) enum SqlValue { + Null, + String(String), + Int(i64), + Double(f64), + Blob(Vec), +} + +impl ToSql for SqlValue { + fn to_sql(&self) -> std::result::Result, rusqlite::Error> { + let val = match self { + SqlValue::Null => ValueRef::Null, + SqlValue::String(v) => ValueRef::Text(v.as_bytes()), + SqlValue::Int(v) => ValueRef::Integer(*v), + SqlValue::Double(v) => ValueRef::Real(*v), + SqlValue::Blob(v) => ValueRef::Blob(&v), + }; + Ok(ToSqlOutput::Borrowed(val)) + } +} + +impl FromSql for SqlValue { + fn column_result(value: ValueRef<'_>) -> std::result::Result { + let val = match value { + ValueRef::Null => SqlValue::Null, + ValueRef::Integer(i) => SqlValue::Int(i), + ValueRef::Real(v) => SqlValue::Double(v), + ValueRef::Text(v) => SqlValue::String(String::from_utf8_lossy(v).to_string()), + ValueRef::Blob(v) => SqlValue::Blob(v.to_vec()), + }; + Ok(val) + } +} + +pub(super) fn db_query_json_str(db: &SqliteStorage, input: &[u8]) -> Result { + let req: DBRequest = serde_json::from_slice(input)?; + let resp = db_query_json(db, req)?; + Ok(serde_json::to_string(&resp)?) +} + +pub(super) fn db_query_json(db: &SqliteStorage, input: DBRequest) -> Result { + let mut stmt = db.db.prepare_cached(&input.sql)?; + + let columns = stmt.column_count(); + + let mut rows = stmt.query(&input.args)?; + + let mut output_rows = vec![]; + + while let Some(row) = rows.next()? { + let mut orow = Vec::with_capacity(columns); + for i in 0..columns { + let v: SqlValue = row.get(i)?; + orow.push(v); + } + + output_rows.push(orow); + } + + Ok(output_rows) +} + // protobuf implementation impl ToSql for pb::SqlValue { diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index 6e88ed2a1..1a2cc1eaa 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -1,6 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use crate::backend::dbproxy::db_query_json_str; use crate::backend::dbproxy::db_query_proto; use crate::backend_proto::backend_input::Value; use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn}; @@ -497,6 +498,10 @@ impl Backend { fn db_query(&self, input: pb::DbQueryIn) -> Result { db_query_proto(&self.col, input) } + + pub fn db_query_json(&self, input: &[u8]) -> Result { + db_query_json_str(&self.col, input) + } } fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue { diff --git a/rslib/src/storage/sqlite.rs b/rslib/src/storage/sqlite.rs index 853a22759..4f6a13bd1 100644 --- a/rslib/src/storage/sqlite.rs +++ b/rslib/src/storage/sqlite.rs @@ -26,6 +26,7 @@ macro_rules! cached_sql { }}; } +// currently public for dbproxy #[derive(Debug)] pub struct SqliteStorage { // currently crate-visible for dbproxy diff --git a/rspy/src/lib.rs b/rspy/src/lib.rs index f24fccc76..80e8c762f 100644 --- a/rspy/src/lib.rs +++ b/rspy/src/lib.rs @@ -70,6 +70,13 @@ impl Backend { self.backend.set_progress_callback(Some(Box::new(func))); } } + + fn db_query(&mut self, py: Python, input: &PyBytes) -> PyObject { + let in_bytes = input.as_bytes(); + let out_string = self.backend.db_query_json(in_bytes).unwrap(); + let out_obj = PyBytes::new(py, out_string.as_bytes()); + out_obj.into() + } } // I18n backend