more TTS and sound work

- use provided language and voices when playing on Mac - fix hang in waiting for termination - allow players to return a rank for a given tag, which will allow for the best matching player to be chosen depending on the context (eg, prioritize one player for videos, one tts player for certain voices, etc)
2020-01-21 11:34:16 +10:00 · 2020-01-21 11:34:16 +10:00 · 66e277e44b
commit 66e277e44b
parent d4d16d35a8
3 changed files with 189 additions and 102 deletions
--- a/qt/aqt/sound.py
+++ b/qt/aqt/sound.py
@ -10,7 +10,7 @@ import time
 import wave
 from abc import ABC, abstractmethod
 from concurrent.futures import Future
-from typing import Any, Callable, Dict, List, Optional, Tuple, cast
+from typing import Any, Callable, Dict, List, Optional, Tuple

 import pyaudio

@ -33,12 +33,18 @@ OnDoneCallback = Callable[[], None]

 class Player(ABC):
    @abstractmethod
-    def can_play(self, tag: AVTag) -> bool:
+    def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
        pass

    @abstractmethod
-    def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
-        pass
+    def rank_for_tag(self, tag: AVTag) -> Optional[int]:
+        """How suited this player is to playing tag.
+
+        AVPlayer will choose the player that returns the highest rank
+        for a given tag.
+
+        If None, this player can not play the tag.
+        """

    def stop(self) -> None:
        """Optional.
@ -56,8 +62,13 @@ class Player(ABC):


 class SoundOrVideoPlayer(Player):  # pylint: disable=abstract-method
-    def can_play(self, tag: AVTag) -> bool:
-        return isinstance(tag, SoundOrVideoTag)
+    default_rank = 0
+
+    def rank_for_tag(self, tag: AVTag) -> Optional[int]:
+        if isinstance(tag, SoundOrVideoTag):
+            return self.default_rank
+        else:
+            return None


 # Main playing interface
@ -138,14 +149,29 @@ class AVPlayer:
            self._play(next)

    def _play(self, tag: AVTag) -> None:
-        for player in self.players:
-            if player.can_play(tag):
-                self.current_player = player
+        best_player = self._best_player_for_tag(tag)
+        if best_player:
+            self.current_player = best_player
            gui_hooks.av_player_will_play(tag)
-                player.play(tag, self._on_play_finished)
-                return
+            self.current_player.play(tag, self._on_play_finished)
+        else:
            print("no players found for", tag)

+    def _best_player_for_tag(self, tag: AVTag) -> Optional[Player]:
+        ranked = []
+        for p in self.players:
+            rank = p.rank_for_tag(tag)
+            if rank is not None:
+                ranked.append((rank, p))
+
+        ranked.sort()
+
+        print(ranked)
+        if ranked:
+            return ranked[-1][1]
+        else:
+            return None
+

 av_player = AVPlayer()

@ -181,43 +207,50 @@ class PlayerInterrupted(Exception):
    pass


-class SimpleProcessPlayer(SoundOrVideoPlayer):
-    "A player that invokes a new process for each file to play."
+class SimpleProcessPlayer(Player):  # pylint: disable=abstract-method
+    "A player that invokes a new process for each tag to play."

    args: List[str] = []
    env: Optional[Dict[str, str]] = None

    def __init__(self, taskman: TaskManager):
        self._taskman = taskman
-        _terminate_flag = False
+        self._terminate_flag = False
+        self._process: Optional[subprocess.Popen] = None

    def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
-        stag = cast(SoundOrVideoTag, tag)
-        self._terminate_flag = False
        self._taskman.run(
-            lambda: self._play(stag.filename), lambda res: self._on_done(res, on_done)
+            lambda: self._play(tag), lambda res: self._on_done(res, on_done)
        )

    def stop(self):
        self._terminate_flag = True
        # block until stopped
-        while self._terminate_flag:
+        t = time.time()
+        while self._terminate_flag and time.time() - t < 10:
            time.sleep(0.1)

-    def _play(self, filename: str) -> None:
-        process = subprocess.Popen(self.args + [filename], env=self.env)
+    def _play(self, tag: AVTag) -> None:
+        assert isinstance(tag, SoundOrVideoTag)
+        self._process = subprocess.Popen(self.args + [tag.filename], env=self.env)
+        self._wait_for_termination()
+
+    def _wait_for_termination(self):
+        try:
            while True:
                try:
-                process.wait(0.1)
-                if process.returncode != 0:
-                    print(f"player got return code: {process.returncode}")
+                    self._process.wait(0.1)
+                    if self._process.returncode != 0:
+                        print(f"player got return code: {self._process.returncode}")
                    return
                except subprocess.TimeoutExpired:
                    pass
                if self._terminate_flag:
-                process.terminate()
-                self._terminate_flag = False
+                    self._process.terminate()
                    raise PlayerInterrupted()
+        finally:
+            self._process = None
+            self._terminate_flag = False

    def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
        try:
@ -228,7 +261,7 @@ class SimpleProcessPlayer(SoundOrVideoPlayer):
        cb()


-class SimpleMpvPlayer(SimpleProcessPlayer):
+class SimpleMpvPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
    args, env = _packagedCmd(
        [
            "mpv",
@ -248,7 +281,7 @@ class SimpleMpvPlayer(SimpleProcessPlayer):
        self.args += ["--no-config", "--include=" + conf_path]


-class SimpleMplayerPlayer(SimpleProcessPlayer):
+class SimpleMplayerPlayer(SimpleProcessPlayer, SoundOrVideoPlayer):
    args, env = _packagedCmd(["mplayer", "-really-quiet", "-noautosub"])
    if isWin:
        args += ["-ao", "win32"]
@ -302,9 +335,9 @@ class MpvManager(MPV, SoundOrVideoPlayer):
        self.default_argv += ["--no-config", "--include=" + conf_path]

    def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
-        stag = cast(SoundOrVideoTag, tag)
+        assert isinstance(tag, SoundOrVideoTag)
        self._on_done = on_done
-        path = os.path.join(os.getcwd(), stag.filename)
+        path = os.path.join(os.getcwd(), tag.filename)
        self.command("loadfile", path, "append-play")

    def stop(self) -> None:
@ -343,27 +376,14 @@ class MpvManager(MPV, SoundOrVideoPlayer):
 class SimpleMplayerSlaveModePlayer(SimpleMplayerPlayer):
    def __init__(self, taskman: TaskManager):
        super().__init__(taskman)
-
-        self._process: Optional[subprocess.Popen] = None
-
        self.args.append("-slave")

-    def _play(self, filename: str) -> None:
+    def _play(self, tag: AVTag) -> None:
+        assert isinstance(tag, SoundOrVideoTag)
        self._process = subprocess.Popen(
-            self.args + [filename], env=self.env, stdin=subprocess.PIPE
+            self.args + [tag.filename], env=self.env, stdin=subprocess.PIPE
        )
-        while True:
-            try:
-                self._process.wait(0.1)
-                if self._process.returncode != 0:
-                    print(f"player got return code: {self._process.returncode}")
-                return
-            except subprocess.TimeoutExpired:
-                pass
-            if self._terminate_flag:
-                self._process.terminate()
-                self._terminate_flag = False
-                raise PlayerInterrupted()
+        self._wait_for_termination()

    def command(self, text: str) -> None:
        """Send a command over the slave interface.
--- a/qt/aqt/tts.py
+++ b/qt/aqt/tts.py
@ -1,67 +1,134 @@
 """
-todo
+Basic text to speech support.
+
+Users can use the following in their card template:
+
+{{tts en_US:Field}}
+
+or
+
+{{tts ja_JP voices=Kyoko,Otoya,Another_name:Field}}
+
+The first argument must be a language code. If provided,
+voices is a comma-separated list of one or more voices that
+the user would prefer. Spaces must not be included.
+Underscores will be converted to spaces.
 """

+from __future__ import annotations
+
+import re
 import subprocess
-import time
-from concurrent.futures import Future
-from typing import cast
+from dataclasses import dataclass
+from typing import List, Optional, cast

 from anki.sound import AVTag, TTSTag
-from aqt.sound import OnDoneCallback, Player, PlayerInterrupted
+from aqt.sound import SimpleProcessPlayer
 from aqt.taskman import TaskManager


-class TTSPlayer(Player):  # pylint: disable=abstract-method
-    def can_play(self, tag: AVTag) -> bool:
-        return isinstance(tag, TTSTag)
+@dataclass
+class TTSArgs:
+    # requested language
+    lang: str
+    # preferred voices, will use first available if possible
+    voices: List[str]
+
+    @classmethod
+    def from_string(cls, args: List[str]) -> TTSArgs:
+        voices: Optional[List[str]] = None
+
+        lang = args[0]
+
+        for arg in args[1:]:
+            try:
+                key, val = arg.split("=")
+            except ValueError:
+                continue
+            key = key.strip()
+            val = val.strip().replace("_", " ")
+
+            if key == "voices":
+                voices = val.split(",")
+
+        return TTSArgs(voices=voices or [], lang=lang)


-class MacTTSPlayer(TTSPlayer):
+# Mac support
+##########################################################################
+
+
+@dataclass
+class MacVoice:
+    name: str
+    lang: str
+
+
+VOICE_HELP_LINE_RE = re.compile(r"^(\S+)\s+(\S+)\s+.*$")
+
+
+def parse_voice_line(line: str) -> Optional[MacVoice]:
+    m = VOICE_HELP_LINE_RE.match(line)
+    if not m:
+        return None
+    return MacVoice(name=m.group(1), lang=m.group(2))
+
+
+class MacTTSPlayer(SimpleProcessPlayer):
    def __init__(self, taskman: TaskManager):
-        self._taskman = taskman
-        self._terminate_flag = False
+        super().__init__(taskman)
+        self._available_voices: Optional[List[MacVoice]] = None

-    def play(self, tag: AVTag, on_done: OnDoneCallback) -> None:
+    def _play(self, tag: AVTag) -> None:
        ttag = cast(TTSTag, tag)
-        self._taskman.run(
-            lambda: self._play(ttag), lambda ret: self._on_done(ret, on_done)
-        )
+        voice = self.voice_for_tag(ttag)

-    def _play(self, tag: TTSTag) -> None:
-        process = subprocess.Popen(
-            ["say", "-v", "Alex", "-f", "-"],
+        self._process = subprocess.Popen(
+            ["say", "-v", voice.name, "-f", "-"],
            stdin=subprocess.PIPE,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        # write the input text to stdin
-        process.stdin.write(tag.text.encode("utf8"))
-        process.stdin.close()
-        # and wait for termination
-        while True:
-            try:
-                process.wait(0.1)
-                if process.returncode != 0:
-                    print(f"player got return code: {process.returncode}")
-                return
-            except subprocess.TimeoutExpired:
-                pass
-            if self._terminate_flag:
-                process.terminate()
-                self._terminate_flag = False
-                raise PlayerInterrupted()
+        self._process.stdin.write(ttag.text.encode("utf8"))
+        self._process.stdin.close()

-    def _on_done(self, ret: Future, cb: OnDoneCallback) -> None:
-        try:
-            ret.result()
-        except PlayerInterrupted:
-            # don't fire done callback when interrupted
-            return
-        cb()
+        self._wait_for_termination()

-    def stop(self):
-        self._terminate_flag = True
-        # block until stopped
-        while self._terminate_flag:
-            time.sleep(0.1)
+    def rank_for_tag(self, tag: AVTag) -> Optional[int]:
+        if not isinstance(tag, TTSTag):
+            return None
+
+        # todo
+        return 0
+
+    def voices(self) -> List[MacVoice]:
+        if not self._available_voices:
+            cmd = subprocess.run(
+                ["say", "-v", "?"], capture_output=True, check=True, encoding="utf8"
+            )
+            self._available_voices = []
+            for line in cmd.stdout.splitlines():
+                voice = parse_voice_line(line)
+                if voice:
+                    self._available_voices.append(voice)
+
+        return self._available_voices
+
+    def voice_for_tag(self, tag: TTSTag) -> MacVoice:
+        args = TTSArgs.from_string(tag.args)
+        voices = self.voices()
+
+        # any requested voices match?
+        for requested_voice in args.voices:
+            avail_voice = next((x for x in voices if x.name == requested_voice), None)
+            if avail_voice:
+                return avail_voice
+
+        # requested language match?
+        avail_voice = next((x for x in voices if x.lang == args.lang), None)
+        if avail_voice:
+            return avail_voice
+
+        # fall back on first voice
+        return voices[0]
--- a/rslib/src/template_filters.rs
+++ b/rslib/src/template_filters.rs
@ -381,8 +381,8 @@ field</a>
    #[test]
    fn test_tts() {
        assert_eq!(
-            tts_filter("tts lang=en_US", "foo"),
-            "[anki:tts][lang=en_US]foo[/anki:tts]"
+            tts_filter("tts en_US voices=Bob,Jane", "foo"),
+            "[anki:tts][en_US voices=Bob,Jane]foo[/anki:tts]"
        );
    }
 }