use (or)json for DB bridge

Some initial testing with orjson indicates performance varies from
slightly better than pysqlite to about 2x slower depending on the type
of query.

Performance could be improved by building the Python list in rspy
instead of sending back json that needs to be decoded, but it may make
more sense to rewrite the hotspots in Rust instead. More testing is
required in any case.
This commit is contained in:
Damien Elmes 2020-03-03 15:36:05 +10:00
parent 04ca8ec038
commit b876d97770
8 changed files with 118 additions and 43 deletions

View File

@ -266,8 +266,9 @@ crt=?, mod=?, scm=?, dty=?, usn=?, ls=?, conf=?""",
def reopen(self) -> None: def reopen(self) -> None:
"Reconnect to DB (after changing threads, etc)." "Reconnect to DB (after changing threads, etc)."
raise Exception("fixme")
if not self.db: if not self.db:
self.db = DBProxy(self.path) #self.db = DBProxy(self.path)
self.media.connect() self.media.connect()
self._openLog() self._openLog()

View File

@ -1,12 +1,15 @@
# Copyright: Ankitects Pty Ltd and contributors # Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
# fixme: lossy utf8 handling from __future__ import annotations
# fixme: progress
from sqlite3 import dbapi2 as sqlite
from typing import Any, Iterable, List, Optional, Sequence, Union from typing import Any, Iterable, List, Optional, Sequence, Union
import anki
# fixme: remember to null on close to avoid circular ref
# fixme: progress
# DBValue is actually Union[str, int, float, None], but if defined # DBValue is actually Union[str, int, float, None], but if defined
# that way, every call site needs to do a type check prior to using # that way, every call site needs to do a type check prior to using
# the return values. # the return values.
@ -20,28 +23,29 @@ class DBProxy:
# Lifecycle # Lifecycle
############### ###############
def __init__(self, path: str) -> None: def __init__(self, backend: anki.rsbackend.RustBackend, path: str) -> None:
self._db = sqlite.connect(path, timeout=0) self._backend = backend
self._path = path self._path = path
self.mod = False self.mod = False
def close(self) -> None: def close(self) -> None:
self._db.close() # fixme
pass
# Transactions # Transactions
############### ###############
def commit(self) -> None: def commit(self) -> None:
self._db.commit() # fixme
pass
def rollback(self) -> None: def rollback(self) -> None:
self._db.rollback() # fixme
pass
def setAutocommit(self, autocommit: bool) -> None: def setAutocommit(self, autocommit: bool) -> None:
if autocommit: # fixme
self._db.isolation_level = None pass
else:
self._db.isolation_level = ""
# Querying # Querying
################ ################
@ -55,16 +59,8 @@ class DBProxy:
if s.startswith(stmt): if s.startswith(stmt):
self.mod = True self.mod = True
# fetch rows # fetch rows
curs = self._db.execute(sql, args) # fixme: first_row_only
if first_row_only: return self._backend.db_query_json(sql, args)
row = curs.fetchone()
curs.close()
if row is not None:
return [row]
else:
return []
else:
return curs.fetchall()
# Query shortcuts # Query shortcuts
################### ###################
@ -98,8 +94,8 @@ class DBProxy:
def executemany(self, sql: str, args: Iterable[Iterable[ValueForDB]]) -> None: def executemany(self, sql: str, args: Iterable[Iterable[ValueForDB]]) -> None:
self.mod = True self.mod = True
self._db.executemany(sql, args) raise Exception("fixme")
def executescript(self, sql: str) -> None: def executescript(self, sql: str) -> None:
self.mod = True self.mod = True
self._db.executescript(sql) raise Exception("fixme")

View File

@ -18,6 +18,7 @@ from typing import (
Any) Any)
import ankirspy # pytype: disable=import-error import ankirspy # pytype: disable=import-error
import orjson
import anki.backend_pb2 as pb import anki.backend_pb2 as pb
import anki.buildinfo import anki.buildinfo
@ -421,6 +422,11 @@ class RustBackend:
return map(sqlrow_to_tuple, output.rows) return map(sqlrow_to_tuple, output.rows)
def db_query_json(self, sql: str, args: Iterable[ValueForDB]) -> List[DBRow]:
input = orjson.dumps(dict(sql=sql, args=args))
output = self._backend.db_query(input)
return orjson.loads(output)
def translate_string_in( def translate_string_in(
key: TR, **kwargs: Union[str, int, float] key: TR, **kwargs: Union[str, int, float]
) -> pb.TranslateStringIn: ) -> pb.TranslateStringIn:

View File

@ -21,6 +21,7 @@ setuptools.setup(
"requests", "requests",
"decorator", "decorator",
"protobuf", "protobuf",
"orjson",
'psutil; sys_platform == "win32"', 'psutil; sys_platform == "win32"',
'distro; sys_platform != "darwin" and sys_platform != "win32"', 'distro; sys_platform != "darwin" and sys_platform != "win32"',
], ],

View File

@ -6,27 +6,85 @@ use crate::err::Result;
use crate::storage::SqliteStorage; use crate::storage::SqliteStorage;
use rusqlite::types::{FromSql, FromSqlError, ToSql, ToSqlOutput, ValueRef}; use rusqlite::types::{FromSql, FromSqlError, ToSql, ToSqlOutput, ValueRef};
use serde_derive::{Deserialize, Serialize}; use serde_derive::{Deserialize, Serialize};
//
// #[derive(Deserialize)] // json implementation
// struct DBRequest {
// sql: String, #[derive(Deserialize)]
// args: Vec<SqlValue>, pub(super) struct DBRequest {
// } sql: String,
// args: Vec<SqlValue>,
}
// #[derive(Serialize)] // #[derive(Serialize)]
// struct DBResult { // pub(super) struct DBResult {
// rows: Vec<Vec<SqlValue>>, // rows: Vec<Vec<SqlValue>>,
// } // }
// type DBResult = Vec<Vec<SqlValue>>;
// #[derive(Serialize, Deserialize, Debug)]
// #[serde(untagged)] #[derive(Serialize, Deserialize, Debug)]
// enum SqlValue { #[serde(untagged)]
// Null, pub(super) enum SqlValue {
// String(String), Null,
// Int(i64), String(String),
// Float(f64), Int(i64),
// Blob(Vec<u8>), Double(f64),
// } Blob(Vec<u8>),
}
impl ToSql for SqlValue {
fn to_sql(&self) -> std::result::Result<ToSqlOutput<'_>, rusqlite::Error> {
let val = match self {
SqlValue::Null => ValueRef::Null,
SqlValue::String(v) => ValueRef::Text(v.as_bytes()),
SqlValue::Int(v) => ValueRef::Integer(*v),
SqlValue::Double(v) => ValueRef::Real(*v),
SqlValue::Blob(v) => ValueRef::Blob(&v),
};
Ok(ToSqlOutput::Borrowed(val))
}
}
impl FromSql for SqlValue {
fn column_result(value: ValueRef<'_>) -> std::result::Result<Self, FromSqlError> {
let val = match value {
ValueRef::Null => SqlValue::Null,
ValueRef::Integer(i) => SqlValue::Int(i),
ValueRef::Real(v) => SqlValue::Double(v),
ValueRef::Text(v) => SqlValue::String(String::from_utf8_lossy(v).to_string()),
ValueRef::Blob(v) => SqlValue::Blob(v.to_vec()),
};
Ok(val)
}
}
pub(super) fn db_query_json_str(db: &SqliteStorage, input: &[u8]) -> Result<String> {
let req: DBRequest = serde_json::from_slice(input)?;
let resp = db_query_json(db, req)?;
Ok(serde_json::to_string(&resp)?)
}
pub(super) fn db_query_json(db: &SqliteStorage, input: DBRequest) -> Result<DBResult> {
let mut stmt = db.db.prepare_cached(&input.sql)?;
let columns = stmt.column_count();
let mut rows = stmt.query(&input.args)?;
let mut output_rows = vec![];
while let Some(row) = rows.next()? {
let mut orow = Vec::with_capacity(columns);
for i in 0..columns {
let v: SqlValue = row.get(i)?;
orow.push(v);
}
output_rows.push(orow);
}
Ok(output_rows)
}
// protobuf implementation // protobuf implementation
impl ToSql for pb::SqlValue { impl ToSql for pb::SqlValue {

View File

@ -1,6 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::backend::dbproxy::db_query_json_str;
use crate::backend::dbproxy::db_query_proto; use crate::backend::dbproxy::db_query_proto;
use crate::backend_proto::backend_input::Value; use crate::backend_proto::backend_input::Value;
use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn}; use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn};
@ -497,6 +498,10 @@ impl Backend {
fn db_query(&self, input: pb::DbQueryIn) -> Result<pb::DbQueryOut> { fn db_query(&self, input: pb::DbQueryIn) -> Result<pb::DbQueryOut> {
db_query_proto(&self.col, input) db_query_proto(&self.col, input)
} }
pub fn db_query_json(&self, input: &[u8]) -> Result<String> {
db_query_json_str(&self.col, input)
}
} }
fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue { fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue {

View File

@ -26,6 +26,7 @@ macro_rules! cached_sql {
}}; }};
} }
// currently public for dbproxy
#[derive(Debug)] #[derive(Debug)]
pub struct SqliteStorage { pub struct SqliteStorage {
// currently crate-visible for dbproxy // currently crate-visible for dbproxy

View File

@ -70,6 +70,13 @@ impl Backend {
self.backend.set_progress_callback(Some(Box::new(func))); self.backend.set_progress_callback(Some(Box::new(func)));
} }
} }
fn db_query(&mut self, py: Python, input: &PyBytes) -> PyObject {
let in_bytes = input.as_bytes();
let out_string = self.backend.db_query_json(in_bytes).unwrap();
let out_obj = PyBytes::new(py, out_string.as_bytes());
out_obj.into()
}
} }
// I18n backend // I18n backend