more TTS and sound work

- use provided language and voices when playing on Mac
- fix hang in waiting for termination
- allow players to return a rank for a given tag,
which will allow for the best matching player to be chosen
depending on the context (eg, prioritize one player for videos,
one tts player for certain voices, etc)
This commit is contained in:
Damien Elmes 2020-01-21 11:34:16 +10:00
parent d4d16d35a8
commit 66e277e44b
3 changed files with 189 additions and 102 deletions

View File

@ -10,7 +10,7 @@ import time
import wave import wave
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from concurrent.futures import Future from concurrent.futures import Future
from typing import Any, Callable, Dict, List, Optional, Tuple, cast from typing import Any, Callable, Dict, List, Optional, Tuple
import pyaudio import pyaudio
@ -33,12 +33,18 @@ OnDoneCallback = Callable[[], None]
class Player(ABC): class Player(ABC):
@abstractmethod @abstractmethod
def can_play(self, tag: AVTag) -> bool: def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
pass pass
@abstractmethod @abstractmethod
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None: def rank_for_tag(self, tag: AVTag) -> Optional[int]:
pass """How suited this player is to playing tag.
AVPlayer will choose the player that returns the highest rank
for a given tag.
If None, this player can not play the tag.
"""
def stop(self) -> None: def stop(self) -> None:
"""Optional. """Optional.
@ -56,8 +62,13 @@ class Player(ABC):
class SoundOrVideoPlayer(Player): # pylint: disable=abstract-method class SoundOrVideoPlayer(Player): # pylint: disable=abstract-method
def can_play(self, tag: AVTag) -> bool: default_rank = 0
return isinstance(tag, SoundOrVideoTag)
def rank_for_tag(self, tag: AVTag) -> Optional[int]:
if isinstance(tag, SoundOrVideoTag):
return self.default_rank
else:
return None
# Main playing interface # Main playing interface
@ -138,14 +149,29 @@ class AVPlayer:
self._play(next) self._play(next)
def _play(self, tag: AVTag) -> None: def _play(self, tag: AVTag) -> None:
for player in self.players: best_player = self._best_player_for_tag(tag)
if player.can_play(tag): if best_player:
self.current_player = player self.current_player = best_player
gui_hooks.av_player_will_play(tag) gui_hooks.av_player_will_play(tag)
player.play(tag, self._on_play_finished) self.current_player.play(tag, self._on_play_finished)
return else:
print("no players found for", tag) print("no players found for", tag)
def _best_player_for_tag(self, tag: AVTag) -> Optional[Player]:
ranked = []
for p in self.players:
rank = p.rank_for_tag(tag)
if rank is not None:
ranked.append((rank, p))
ranked.sort()
print(ranked)
if ranked:
return ranked[-1][1]
else:
return None
av_player = AVPlayer() av_player = AVPlayer()
@ -181,43 +207,50 @@ class PlayerInterrupted(Exception):
pass pass
class SimpleProcessPlayer(SoundOrVideoPlayer): class SimpleProcessPlayer(Player): # pylint: disable=abstract-method
"A player that invokes a new process for each file to play." "A player that invokes a new process for each tag to play."
args: List[str] = [] args: List[str] = []
env: Optional[Dict[str, str]] = None env: Optional[Dict[str, str]] = None
def __init__(self, taskman: TaskManager): def __init__(self, taskman: TaskManager):
self._taskman = taskman self._taskman = taskman
_terminate_flag = False self._terminate_flag = False
self._process: Optional[subprocess.Popen] = None
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None: def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
stag = cast(SoundOrVideoTag, tag)
self._terminate_flag = False
self._taskman.run( self._taskman.run(
lambda: self._play(stag.filename), lambda res: self._on_done(res, on_done) lambda: self._play(tag), lambda res: self._on_done(res, on_done)
) )
def stop(self): def stop(self):
self._terminate_flag = True self._terminate_flag = True
# block until stopped # block until stopped
while self._terminate_flag: t = time.time()
while self._terminate_flag and time.time() - t < 10:
time.sleep(0.1) time.sleep(0.1)
def _play(self, filename: str) -> None: def _play(self, tag: AVTag) -> None:
process = subprocess.Popen(self.args + [filename], env=self.env) assert isinstance(tag, SoundOrVideoTag)
self._process = subprocess.Popen(self.args + [tag.filename], env=self.env)
self._wait_for_termination()
def _wait_for_termination(self):
try:
while True: while True:
try: try:
process.wait(0.1) self._process.wait(0.1)
if process.returncode != 0: if self._process.returncode != 0:
print(f"player got return code: {process.returncode}") print(f"player got return code: {self._process.returncode}")
return return
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
pass pass
if self._terminate_flag: if self._terminate_flag:
process.terminate() self._process.terminate()
self._terminate_flag = False
raise PlayerInterrupted() raise PlayerInterrupted()
finally:
self._process = None
self._terminate_flag = False
def _on_done(self, ret: Future, cb: OnDoneCallback) -> None: def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
try: try:
@ -228,7 +261,7 @@ class SimpleProcessPlayer(SoundOrVideoPlayer):
cb() cb()
class SimpleMpvPlayer(SimpleProcessPlayer): class SimpleMpvPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
args, env = _packagedCmd( args, env = _packagedCmd(
[ [
"mpv", "mpv",
@ -248,7 +281,7 @@ class SimpleMpvPlayer(SimpleProcessPlayer):
self.args += ["--no-config", "--include=" + conf_path] self.args += ["--no-config", "--include=" + conf_path]
class SimpleMplayerPlayer(SimpleProcessPlayer): class SimpleMplayerPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
args, env = _packagedCmd(["mplayer", "-really-quiet", "-noautosub"]) args, env = _packagedCmd(["mplayer", "-really-quiet", "-noautosub"])
if isWin: if isWin:
args += ["-ao", "win32"] args += ["-ao", "win32"]
@ -302,9 +335,9 @@ class MpvManager(MPV, SoundOrVideoPlayer):
self.default_argv += ["--no-config", "--include=" + conf_path] self.default_argv += ["--no-config", "--include=" + conf_path]
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None: def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
stag = cast(SoundOrVideoTag, tag) assert isinstance(tag, SoundOrVideoTag)
self._on_done = on_done self._on_done = on_done
path = os.path.join(os.getcwd(), stag.filename) path = os.path.join(os.getcwd(), tag.filename)
self.command("loadfile", path, "append-play") self.command("loadfile", path, "append-play")
def stop(self) -> None: def stop(self) -> None:
@ -343,27 +376,14 @@ class MpvManager(MPV, SoundOrVideoPlayer):
class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer): class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
def __init__(self, taskman: TaskManager): def __init__(self, taskman: TaskManager):
super().__init__(taskman) super().__init__(taskman)
self._process: Optional[subprocess.Popen] = None
self.args.append("-slave") self.args.append("-slave")
def _play(self, filename: str) -> None: def _play(self, tag: AVTag) -> None:
assert isinstance(tag, SoundOrVideoTag)
self._process = subprocess.Popen( self._process = subprocess.Popen(
self.args + [filename], env=self.env, stdin=subprocess.PIPE self.args + [tag.filename], env=self.env, stdin=subprocess.PIPE
) )
while True: self._wait_for_termination()
try:
self._process.wait(0.1)
if self._process.returncode != 0:
print(f"player got return code: {self._process.returncode}")
return
except subprocess.TimeoutExpired:
pass
if self._terminate_flag:
self._process.terminate()
self._terminate_flag = False
raise PlayerInterrupted()
def command(self, text: str) -> None: def command(self, text: str) -> None:
"""Send a command over the slave interface. """Send a command over the slave interface.

View File

@ -1,67 +1,134 @@
""" """
todo Basic text to speech support.
Users can use the following in their card template:
{{tts en_US:Field}}
or
{{tts ja_JP voices=Kyoko,Otoya,Another_name:Field}}
The first argument must be a language code. If provided,
voices is a comma-separated list of one or more voices that
the user would prefer. Spaces must not be included.
Underscores will be converted to spaces.
""" """
from __future__ import annotations
import re
import subprocess import subprocess
import time from dataclasses import dataclass
from concurrent.futures import Future from typing import List, Optional, cast
from typing import cast
from anki.sound import AVTag, TTSTag from anki.sound import AVTag, TTSTag
from aqt.sound import OnDoneCallback, Player, PlayerInterrupted from aqt.sound import SimpleProcessPlayer
from aqt.taskman import TaskManager from aqt.taskman import TaskManager
class TTSPlayer(Player): # pylint: disable=abstract-method @dataclass
def can_play(self, tag: AVTag) -> bool: class TTSArgs:
return isinstance(tag, TTSTag) # requested language
lang: str
# preferred voices, will use first available if possible
voices: List[str]
@classmethod
def from_string(cls, args: List[str]) -> TTSArgs:
voices: Optional[List[str]] = None
lang = args[0]
for arg in args[1:]:
try:
key, val = arg.split("=")
except ValueError:
continue
key = key.strip()
val = val.strip().replace("_", " ")
if key == "voices":
voices = val.split(",")
return TTSArgs(voices=voices or [], lang=lang)
class MacTTSPlayer(TTSPlayer): # Mac support
##########################################################################
@dataclass
class MacVoice:
name: str
lang: str
VOICE_HELP_LINE_RE = re.compile(r"^(\S+)\s+(\S+)\s+.*$")
def parse_voice_line(line: str) -> Optional[MacVoice]:
m = VOICE_HELP_LINE_RE.match(line)
if not m:
return None
return MacVoice(name=m.group(1), lang=m.group(2))
class MacTTSPlayer(SimpleProcessPlayer):
def __init__(self, taskman: TaskManager): def __init__(self, taskman: TaskManager):
self._taskman = taskman super().__init__(taskman)
self._terminate_flag = False self._available_voices: Optional[List[MacVoice]] = None
def play(self, tag: AVTag, on_done: OnDoneCallback) -> None: def _play(self, tag: AVTag) -> None:
ttag = cast(TTSTag, tag) ttag = cast(TTSTag, tag)
self._taskman.run( voice = self.voice_for_tag(ttag)
lambda: self._play(ttag), lambda ret: self._on_done(ret, on_done)
)
def _play(self, tag: TTSTag) -> None: self._process = subprocess.Popen(
process = subprocess.Popen( ["say", "-v", voice.name, "-f", "-"],
["say", "-v", "Alex", "-f", "-"],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
) )
# write the input text to stdin # write the input text to stdin
process.stdin.write(tag.text.encode("utf8")) self._process.stdin.write(ttag.text.encode("utf8"))
process.stdin.close() self._process.stdin.close()
# and wait for termination
while True:
try:
process.wait(0.1)
if process.returncode != 0:
print(f"player got return code: {process.returncode}")
return
except subprocess.TimeoutExpired:
pass
if self._terminate_flag:
process.terminate()
self._terminate_flag = False
raise PlayerInterrupted()
def _on_done(self, ret: Future, cb: OnDoneCallback) -> None: self._wait_for_termination()
try:
ret.result()
except PlayerInterrupted:
# don't fire done callback when interrupted
return
cb()
def stop(self): def rank_for_tag(self, tag: AVTag) -> Optional[int]:
self._terminate_flag = True if not isinstance(tag, TTSTag):
# block until stopped return None
while self._terminate_flag:
time.sleep(0.1) # todo
return 0
def voices(self) -> List[MacVoice]:
if not self._available_voices:
cmd = subprocess.run(
["say", "-v", "?"], capture_output=True, check=True, encoding="utf8"
)
self._available_voices = []
for line in cmd.stdout.splitlines():
voice = parse_voice_line(line)
if voice:
self._available_voices.append(voice)
return self._available_voices
def voice_for_tag(self, tag: TTSTag) -> MacVoice:
args = TTSArgs.from_string(tag.args)
voices = self.voices()
# any requested voices match?
for requested_voice in args.voices:
avail_voice = next((x for x in voices if x.name == requested_voice), None)
if avail_voice:
return avail_voice
# requested language match?
avail_voice = next((x for x in voices if x.lang == args.lang), None)
if avail_voice:
return avail_voice
# fall back on first voice
return voices[0]

View File

@ -381,8 +381,8 @@ field</a>
#[test] #[test]
fn test_tts() { fn test_tts() {
assert_eq!( assert_eq!(
tts_filter("tts lang=en_US", "foo"), tts_filter("tts en_US voices=Bob,Jane", "foo"),
"[anki:tts][lang=en_US]foo[/anki:tts]" "[anki:tts][en_US voices=Bob,Jane]foo[/anki:tts]"
); );
} }
} }