more TTS and sound work
- use provided language and voices when playing on Mac - fix hang in waiting for termination - allow players to return a rank for a given tag, which will allow for the best matching player to be chosen depending on the context (eg, prioritize one player for videos, one tts player for certain voices, etc)
This commit is contained in:
parent
d4d16d35a8
commit
66e277e44b
112
qt/aqt/sound.py
112
qt/aqt/sound.py
@ -10,7 +10,7 @@ import time
|
|||||||
import wave
|
import wave
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from concurrent.futures import Future
|
from concurrent.futures import Future
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple, cast
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import pyaudio
|
import pyaudio
|
||||||
|
|
||||||
@ -33,12 +33,18 @@ OnDoneCallback = Callable[[], None]
|
|||||||
|
|
||||||
class Player(ABC):
|
class Player(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def can_play(self, tag: AVTag) -> bool:
|
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
def rank_for_tag(self, tag: AVTag) -> Optional[int]:
|
||||||
pass
|
"""How suited this player is to playing tag.
|
||||||
|
|
||||||
|
AVPlayer will choose the player that returns the highest rank
|
||||||
|
for a given tag.
|
||||||
|
|
||||||
|
If None, this player can not play the tag.
|
||||||
|
"""
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
"""Optional.
|
"""Optional.
|
||||||
@ -56,8 +62,13 @@ class Player(ABC):
|
|||||||
|
|
||||||
|
|
||||||
class SoundOrVideoPlayer(Player): # pylint: disable=abstract-method
|
class SoundOrVideoPlayer(Player): # pylint: disable=abstract-method
|
||||||
def can_play(self, tag: AVTag) -> bool:
|
default_rank = 0
|
||||||
return isinstance(tag, SoundOrVideoTag)
|
|
||||||
|
def rank_for_tag(self, tag: AVTag) -> Optional[int]:
|
||||||
|
if isinstance(tag, SoundOrVideoTag):
|
||||||
|
return self.default_rank
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Main playing interface
|
# Main playing interface
|
||||||
@ -138,14 +149,29 @@ class AVPlayer:
|
|||||||
self._play(next)
|
self._play(next)
|
||||||
|
|
||||||
def _play(self, tag: AVTag) -> None:
|
def _play(self, tag: AVTag) -> None:
|
||||||
for player in self.players:
|
best_player = self._best_player_for_tag(tag)
|
||||||
if player.can_play(tag):
|
if best_player:
|
||||||
self.current_player = player
|
self.current_player = best_player
|
||||||
gui_hooks.av_player_will_play(tag)
|
gui_hooks.av_player_will_play(tag)
|
||||||
player.play(tag, self._on_play_finished)
|
self.current_player.play(tag, self._on_play_finished)
|
||||||
return
|
else:
|
||||||
print("no players found for", tag)
|
print("no players found for", tag)
|
||||||
|
|
||||||
|
def _best_player_for_tag(self, tag: AVTag) -> Optional[Player]:
|
||||||
|
ranked = []
|
||||||
|
for p in self.players:
|
||||||
|
rank = p.rank_for_tag(tag)
|
||||||
|
if rank is not None:
|
||||||
|
ranked.append((rank, p))
|
||||||
|
|
||||||
|
ranked.sort()
|
||||||
|
|
||||||
|
print(ranked)
|
||||||
|
if ranked:
|
||||||
|
return ranked[-1][1]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
av_player = AVPlayer()
|
av_player = AVPlayer()
|
||||||
|
|
||||||
@ -181,43 +207,50 @@ class PlayerInterrupted(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SimpleProcessPlayer(SoundOrVideoPlayer):
|
class SimpleProcessPlayer(Player): # pylint: disable=abstract-method
|
||||||
"A player that invokes a new process for each file to play."
|
"A player that invokes a new process for each tag to play."
|
||||||
|
|
||||||
args: List[str] = []
|
args: List[str] = []
|
||||||
env: Optional[Dict[str, str]] = None
|
env: Optional[Dict[str, str]] = None
|
||||||
|
|
||||||
def __init__(self, taskman: TaskManager):
|
def __init__(self, taskman: TaskManager):
|
||||||
self._taskman = taskman
|
self._taskman = taskman
|
||||||
_terminate_flag = False
|
self._terminate_flag = False
|
||||||
|
self._process: Optional[subprocess.Popen] = None
|
||||||
|
|
||||||
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
||||||
stag = cast(SoundOrVideoTag, tag)
|
|
||||||
self._terminate_flag = False
|
|
||||||
self._taskman.run(
|
self._taskman.run(
|
||||||
lambda: self._play(stag.filename), lambda res: self._on_done(res, on_done)
|
lambda: self._play(tag), lambda res: self._on_done(res, on_done)
|
||||||
)
|
)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
self._terminate_flag = True
|
self._terminate_flag = True
|
||||||
# block until stopped
|
# block until stopped
|
||||||
while self._terminate_flag:
|
t = time.time()
|
||||||
|
while self._terminate_flag and time.time() - t < 10:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
def _play(self, filename: str) -> None:
|
def _play(self, tag: AVTag) -> None:
|
||||||
process = subprocess.Popen(self.args + [filename], env=self.env)
|
assert isinstance(tag, SoundOrVideoTag)
|
||||||
|
self._process = subprocess.Popen(self.args + [tag.filename], env=self.env)
|
||||||
|
self._wait_for_termination()
|
||||||
|
|
||||||
|
def _wait_for_termination(self):
|
||||||
|
try:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
process.wait(0.1)
|
self._process.wait(0.1)
|
||||||
if process.returncode != 0:
|
if self._process.returncode != 0:
|
||||||
print(f"player got return code: {process.returncode}")
|
print(f"player got return code: {self._process.returncode}")
|
||||||
return
|
return
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
pass
|
pass
|
||||||
if self._terminate_flag:
|
if self._terminate_flag:
|
||||||
process.terminate()
|
self._process.terminate()
|
||||||
self._terminate_flag = False
|
|
||||||
raise PlayerInterrupted()
|
raise PlayerInterrupted()
|
||||||
|
finally:
|
||||||
|
self._process = None
|
||||||
|
self._terminate_flag = False
|
||||||
|
|
||||||
def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
|
def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
|
||||||
try:
|
try:
|
||||||
@ -228,7 +261,7 @@ class SimpleProcessPlayer(SoundOrVideoPlayer):
|
|||||||
cb()
|
cb()
|
||||||
|
|
||||||
|
|
||||||
class SimpleMpvPlayer(SimpleProcessPlayer):
|
class SimpleMpvPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
|
||||||
args, env = _packagedCmd(
|
args, env = _packagedCmd(
|
||||||
[
|
[
|
||||||
"mpv",
|
"mpv",
|
||||||
@ -248,7 +281,7 @@ class SimpleMpvPlayer(SimpleProcessPlayer):
|
|||||||
self.args += ["--no-config", "--include=" + conf_path]
|
self.args += ["--no-config", "--include=" + conf_path]
|
||||||
|
|
||||||
|
|
||||||
class SimpleMplayerPlayer(SimpleProcessPlayer):
|
class SimpleMplayerPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
|
||||||
args, env = _packagedCmd(["mplayer", "-really-quiet", "-noautosub"])
|
args, env = _packagedCmd(["mplayer", "-really-quiet", "-noautosub"])
|
||||||
if isWin:
|
if isWin:
|
||||||
args += ["-ao", "win32"]
|
args += ["-ao", "win32"]
|
||||||
@ -302,9 +335,9 @@ class MpvManager(MPV, SoundOrVideoPlayer):
|
|||||||
self.default_argv += ["--no-config", "--include=" + conf_path]
|
self.default_argv += ["--no-config", "--include=" + conf_path]
|
||||||
|
|
||||||
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
||||||
stag = cast(SoundOrVideoTag, tag)
|
assert isinstance(tag, SoundOrVideoTag)
|
||||||
self._on_done = on_done
|
self._on_done = on_done
|
||||||
path = os.path.join(os.getcwd(), stag.filename)
|
path = os.path.join(os.getcwd(), tag.filename)
|
||||||
self.command("loadfile", path, "append-play")
|
self.command("loadfile", path, "append-play")
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
@ -343,27 +376,14 @@ class MpvManager(MPV, SoundOrVideoPlayer):
|
|||||||
class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
|
class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
|
||||||
def __init__(self, taskman: TaskManager):
|
def __init__(self, taskman: TaskManager):
|
||||||
super().__init__(taskman)
|
super().__init__(taskman)
|
||||||
|
|
||||||
self._process: Optional[subprocess.Popen] = None
|
|
||||||
|
|
||||||
self.args.append("-slave")
|
self.args.append("-slave")
|
||||||
|
|
||||||
def _play(self, filename: str) -> None:
|
def _play(self, tag: AVTag) -> None:
|
||||||
|
assert isinstance(tag, SoundOrVideoTag)
|
||||||
self._process = subprocess.Popen(
|
self._process = subprocess.Popen(
|
||||||
self.args + [filename], env=self.env, stdin=subprocess.PIPE
|
self.args + [tag.filename], env=self.env, stdin=subprocess.PIPE
|
||||||
)
|
)
|
||||||
while True:
|
self._wait_for_termination()
|
||||||
try:
|
|
||||||
self._process.wait(0.1)
|
|
||||||
if self._process.returncode != 0:
|
|
||||||
print(f"player got return code: {self._process.returncode}")
|
|
||||||
return
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
pass
|
|
||||||
if self._terminate_flag:
|
|
||||||
self._process.terminate()
|
|
||||||
self._terminate_flag = False
|
|
||||||
raise PlayerInterrupted()
|
|
||||||
|
|
||||||
def command(self, text: str) -> None:
|
def command(self, text: str) -> None:
|
||||||
"""Send a command over the slave interface.
|
"""Send a command over the slave interface.
|
||||||
|
157
qt/aqt/tts.py
157
qt/aqt/tts.py
@ -1,67 +1,134 @@
|
|||||||
"""
|
"""
|
||||||
todo
|
Basic text to speech support.
|
||||||
|
|
||||||
|
Users can use the following in their card template:
|
||||||
|
|
||||||
|
{{tts en_US:Field}}
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
{{tts ja_JP voices=Kyoko,Otoya,Another_name:Field}}
|
||||||
|
|
||||||
|
The first argument must be a language code. If provided,
|
||||||
|
voices is a comma-separated list of one or more voices that
|
||||||
|
the user would prefer. Spaces must not be included.
|
||||||
|
Underscores will be converted to spaces.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
from dataclasses import dataclass
|
||||||
from concurrent.futures import Future
|
from typing import List, Optional, cast
|
||||||
from typing import cast
|
|
||||||
|
|
||||||
from anki.sound import AVTag, TTSTag
|
from anki.sound import AVTag, TTSTag
|
||||||
from aqt.sound import OnDoneCallback, Player, PlayerInterrupted
|
from aqt.sound import SimpleProcessPlayer
|
||||||
from aqt.taskman import TaskManager
|
from aqt.taskman import TaskManager
|
||||||
|
|
||||||
|
|
||||||
class TTSPlayer(Player): # pylint: disable=abstract-method
|
@dataclass
|
||||||
def can_play(self, tag: AVTag) -> bool:
|
class TTSArgs:
|
||||||
return isinstance(tag, TTSTag)
|
# requested language
|
||||||
|
lang: str
|
||||||
|
# preferred voices, will use first available if possible
|
||||||
|
voices: List[str]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_string(cls, args: List[str]) -> TTSArgs:
|
||||||
|
voices: Optional[List[str]] = None
|
||||||
|
|
||||||
|
lang = args[0]
|
||||||
|
|
||||||
|
for arg in args[1:]:
|
||||||
|
try:
|
||||||
|
key, val = arg.split("=")
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip().replace("_", " ")
|
||||||
|
|
||||||
|
if key == "voices":
|
||||||
|
voices = val.split(",")
|
||||||
|
|
||||||
|
return TTSArgs(voices=voices or [], lang=lang)
|
||||||
|
|
||||||
|
|
||||||
class MacTTSPlayer(TTSPlayer):
|
# Mac support
|
||||||
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MacVoice:
|
||||||
|
name: str
|
||||||
|
lang: str
|
||||||
|
|
||||||
|
|
||||||
|
VOICE_HELP_LINE_RE = re.compile(r"^(\S+)\s+(\S+)\s+.*$")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_voice_line(line: str) -> Optional[MacVoice]:
|
||||||
|
m = VOICE_HELP_LINE_RE.match(line)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return MacVoice(name=m.group(1), lang=m.group(2))
|
||||||
|
|
||||||
|
|
||||||
|
class MacTTSPlayer(SimpleProcessPlayer):
|
||||||
def __init__(self, taskman: TaskManager):
|
def __init__(self, taskman: TaskManager):
|
||||||
self._taskman = taskman
|
super().__init__(taskman)
|
||||||
self._terminate_flag = False
|
self._available_voices: Optional[List[MacVoice]] = None
|
||||||
|
|
||||||
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
|
def _play(self, tag: AVTag) -> None:
|
||||||
ttag = cast(TTSTag, tag)
|
ttag = cast(TTSTag, tag)
|
||||||
self._taskman.run(
|
voice = self.voice_for_tag(ttag)
|
||||||
lambda: self._play(ttag), lambda ret: self._on_done(ret, on_done)
|
|
||||||
)
|
|
||||||
|
|
||||||
def _play(self, tag: TTSTag) -> None:
|
self._process = subprocess.Popen(
|
||||||
process = subprocess.Popen(
|
["say", "-v", voice.name, "-f", "-"],
|
||||||
["say", "-v", "Alex", "-f", "-"],
|
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
)
|
)
|
||||||
# write the input text to stdin
|
# write the input text to stdin
|
||||||
process.stdin.write(tag.text.encode("utf8"))
|
self._process.stdin.write(ttag.text.encode("utf8"))
|
||||||
process.stdin.close()
|
self._process.stdin.close()
|
||||||
# and wait for termination
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
process.wait(0.1)
|
|
||||||
if process.returncode != 0:
|
|
||||||
print(f"player got return code: {process.returncode}")
|
|
||||||
return
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
pass
|
|
||||||
if self._terminate_flag:
|
|
||||||
process.terminate()
|
|
||||||
self._terminate_flag = False
|
|
||||||
raise PlayerInterrupted()
|
|
||||||
|
|
||||||
def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
|
self._wait_for_termination()
|
||||||
try:
|
|
||||||
ret.result()
|
|
||||||
except PlayerInterrupted:
|
|
||||||
# don't fire done callback when interrupted
|
|
||||||
return
|
|
||||||
cb()
|
|
||||||
|
|
||||||
def stop(self):
|
def rank_for_tag(self, tag: AVTag) -> Optional[int]:
|
||||||
self._terminate_flag = True
|
if not isinstance(tag, TTSTag):
|
||||||
# block until stopped
|
return None
|
||||||
while self._terminate_flag:
|
|
||||||
time.sleep(0.1)
|
# todo
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def voices(self) -> List[MacVoice]:
|
||||||
|
if not self._available_voices:
|
||||||
|
cmd = subprocess.run(
|
||||||
|
["say", "-v", "?"], capture_output=True, check=True, encoding="utf8"
|
||||||
|
)
|
||||||
|
self._available_voices = []
|
||||||
|
for line in cmd.stdout.splitlines():
|
||||||
|
voice = parse_voice_line(line)
|
||||||
|
if voice:
|
||||||
|
self._available_voices.append(voice)
|
||||||
|
|
||||||
|
return self._available_voices
|
||||||
|
|
||||||
|
def voice_for_tag(self, tag: TTSTag) -> MacVoice:
|
||||||
|
args = TTSArgs.from_string(tag.args)
|
||||||
|
voices = self.voices()
|
||||||
|
|
||||||
|
# any requested voices match?
|
||||||
|
for requested_voice in args.voices:
|
||||||
|
avail_voice = next((x for x in voices if x.name == requested_voice), None)
|
||||||
|
if avail_voice:
|
||||||
|
return avail_voice
|
||||||
|
|
||||||
|
# requested language match?
|
||||||
|
avail_voice = next((x for x in voices if x.lang == args.lang), None)
|
||||||
|
if avail_voice:
|
||||||
|
return avail_voice
|
||||||
|
|
||||||
|
# fall back on first voice
|
||||||
|
return voices[0]
|
||||||
|
@ -381,8 +381,8 @@ field</a>
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_tts() {
|
fn test_tts() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tts_filter("tts lang=en_US", "foo"),
|
tts_filter("tts en_US voices=Bob,Jane", "foo"),
|
||||||
"[anki:tts][lang=en_US]foo[/anki:tts]"
|
"[anki:tts][en_US voices=Bob,Jane]foo[/anki:tts]"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user