From 56703e5f3a7c05ac6aef7d24fcafdf709d0811bb Mon Sep 17 00:00:00 2001 From: Ryan Aird Date: Mon, 14 Dec 2020 21:49:57 -0600 Subject: [PATCH 1/4] WinRT TTS API support --- qt/aqt/sound.py | 3 ++- qt/aqt/tts.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py index 4f9c324a7..e936e499b 100644 --- a/qt/aqt/sound.py +++ b/qt/aqt/sound.py @@ -916,9 +916,10 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None: av_player.players.append(MacTTSPlayer(taskman)) elif isWin: - from aqt.tts import WindowsTTSPlayer + from aqt.tts import WindowsTTSPlayer, WindowsRTTTSFilePlayer av_player.players.append(WindowsTTSPlayer(taskman)) + av_player.players.append(WindowsRTTTSFilePlayer(taskman)) # cleanup at shutdown atexit.register(av_player.shutdown) diff --git a/qt/aqt/tts.py b/qt/aqt/tts.py index 50c84761d..b3ee56fda 100644 --- a/qt/aqt/tts.py +++ b/qt/aqt/tts.py @@ -28,6 +28,8 @@ from __future__ import annotations import os import re import subprocess +import threading +import asyncio from concurrent.futures import Future from dataclasses import dataclass from operator import attrgetter @@ -516,3 +518,63 @@ if isWin: "eg. 1.5 -> 15, 0.5 -> -5" speed = (speed * 10) - 10 return int(max(-10, min(10, speed))) + + @dataclass + class WindowsRTVoice(TTSVoice): + id: Any + + class WindowsRTTTSFilePlayer(TTSProcessPlayer): + voice_list = None + tmppath = os.path.join(tmpdir(), "tts.wav") + + def import_voices(self) -> None: + import winrt.windows.media.speechsynthesis as speechsynthesis + self.voice_list = speechsynthesis.SpeechSynthesizer.get_all_voices() + def get_available_voices(self) -> List[TTSVoice]: + t = threading.Thread(target=self.import_voices) + t.start() + t.join() + return list(map(self._voice_to_object, self.voice_list)) + def _voice_to_object(self, voice: Any) -> TTSVoice: + return WindowsRTVoice(id=voice.id, name=voice.display_name.replace(" ", "_"), lang=voice.language.replace("-", "_")) + def _play(self, tag: AVTag) -> None: + assert isinstance(tag, TTSTag) + match = self.voice_for_tag(tag) + assert match + voice = cast(WindowsRTVoice, match.voice) + + self._taskman.run_on_main( + lambda: gui_hooks.av_player_did_begin_playing(self, tag) + ) + asyncio.run(self.speakText(tag, voice.id)) + def _on_done(self, ret: Future, cb: OnDoneCallback): + ret.result() + + # inject file into the top of the audio queue + from aqt.sound import av_player + + av_player.insert_file(self.tmppath) + + # then tell player to advance, which will cause the file to be played + cb() + async def speakText(self, tag: TTSTag, voice_id): + import winrt.windows.media.speechsynthesis as speechsynthesis + import winrt.windows.storage.streams as streams + synthesizer = speechsynthesis.SpeechSynthesizer() + + voices = speechsynthesis.SpeechSynthesizer.get_all_voices() + voice_match = next(filter(lambda v: v.id == voice_id, voices)) + + assert voice_match + + synthesizer.voice = voice_match + synthesizer.options.speaking_rate = tag.speed + + stream = await synthesizer.synthesize_text_to_stream_async(tag.field_text) + inputStream = stream.get_input_stream_at(0) + dataReader = streams.DataReader(inputStream) + dataReader.load_async(stream.size) + f = open(self.tmppath, 'wb') + for x in range(stream.size): + f.write(bytes([dataReader.read_byte()])) + f.close() From 9a0b112a0fca8b2924885069297b862167e210bb Mon Sep 17 00:00:00 2001 From: Ryan Aird Date: Sat, 19 Dec 2020 20:10:23 -0600 Subject: [PATCH 2/4] Lower default rank for non-RT voices and restrict to Windows 10 October 2018 or greater --- qt/aqt/sound.py | 9 +++++++-- qt/aqt/tts.py | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py index e936e499b..1b44adebc 100644 --- a/qt/aqt/sound.py +++ b/qt/aqt/sound.py @@ -5,6 +5,7 @@ from __future__ import annotations import atexit import os +import platform import re import subprocess import sys @@ -916,10 +917,14 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None: av_player.players.append(MacTTSPlayer(taskman)) elif isWin: - from aqt.tts import WindowsTTSPlayer, WindowsRTTTSFilePlayer + from aqt.tts import WindowsRTTTSFilePlayer, WindowsTTSPlayer av_player.players.append(WindowsTTSPlayer(taskman)) - av_player.players.append(WindowsRTTTSFilePlayer(taskman)) + + if platform.release() == "10": + # If Windows 10, ensure it's October 2018 update or later + if int(platform.version().split(".")[-1]) >= 17763: + av_player.players.append(WindowsRTTTSFilePlayer(taskman)) # cleanup at shutdown atexit.register(av_player.shutdown) diff --git a/qt/aqt/tts.py b/qt/aqt/tts.py index b3ee56fda..2536a8d56 100644 --- a/qt/aqt/tts.py +++ b/qt/aqt/tts.py @@ -25,11 +25,11 @@ expose the name of the engine, which would mean the user could write from __future__ import annotations +import asyncio import os import re import subprocess import threading -import asyncio from concurrent.futures import Future from dataclasses import dataclass from operator import attrgetter @@ -470,6 +470,7 @@ if isWin: return LCIDS.get(dec_str, "unknown") class WindowsTTSPlayer(TTSProcessPlayer): + default_rank = -1 try: speaker = win32com.client.Dispatch("SAPI.SpVoice") except: @@ -529,14 +530,22 @@ if isWin: def import_voices(self) -> None: import winrt.windows.media.speechsynthesis as speechsynthesis + self.voice_list = speechsynthesis.SpeechSynthesizer.get_all_voices() + def get_available_voices(self) -> List[TTSVoice]: t = threading.Thread(target=self.import_voices) t.start() t.join() return list(map(self._voice_to_object, self.voice_list)) + def _voice_to_object(self, voice: Any) -> TTSVoice: - return WindowsRTVoice(id=voice.id, name=voice.display_name.replace(" ", "_"), lang=voice.language.replace("-", "_")) + return WindowsRTVoice( + id=voice.id, + name=voice.display_name.replace(" ", "_"), + lang=voice.language.replace("-", "_"), + ) + def _play(self, tag: AVTag) -> None: assert isinstance(tag, TTSTag) match = self.voice_for_tag(tag) @@ -547,6 +556,7 @@ if isWin: lambda: gui_hooks.av_player_did_begin_playing(self, tag) ) asyncio.run(self.speakText(tag, voice.id)) + def _on_done(self, ret: Future, cb: OnDoneCallback): ret.result() @@ -557,9 +567,11 @@ if isWin: # then tell player to advance, which will cause the file to be played cb() + async def speakText(self, tag: TTSTag, voice_id): import winrt.windows.media.speechsynthesis as speechsynthesis import winrt.windows.storage.streams as streams + synthesizer = speechsynthesis.SpeechSynthesizer() voices = speechsynthesis.SpeechSynthesizer.get_all_voices() @@ -574,7 +586,7 @@ if isWin: inputStream = stream.get_input_stream_at(0) dataReader = streams.DataReader(inputStream) dataReader.load_async(stream.size) - f = open(self.tmppath, 'wb') + f = open(self.tmppath, "wb") for x in range(stream.size): f.write(bytes([dataReader.read_byte()])) f.close() From ac78698a1ebbe317ee66496acd91a343ad3187f0 Mon Sep 17 00:00:00 2001 From: Ryan Aird Date: Sat, 19 Dec 2020 20:15:12 -0600 Subject: [PATCH 3/4] Add rye761 to contributers list --- CONTRIBUTORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index a1ebd645f..20813b6b5 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -66,9 +66,10 @@ David Allison Tsung-Han Yu Piotr Kubowicz RumovZ -Cecini +Cecini Krish Shah ianki +rye761 ******************** From a30064d5e9df07b0458d26a3c8e8112c5e64e58b Mon Sep 17 00:00:00 2001 From: Ryan Aird Date: Sun, 20 Dec 2020 19:43:09 -0600 Subject: [PATCH 4/4] Make RT import statement conditional, fix formatting, ignore WinRT for type checking --- qt/aqt/sound.py | 4 +++- qt/aqt/tts.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/qt/aqt/sound.py b/qt/aqt/sound.py index 1b44adebc..64fdc6304 100644 --- a/qt/aqt/sound.py +++ b/qt/aqt/sound.py @@ -917,11 +917,13 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None: av_player.players.append(MacTTSPlayer(taskman)) elif isWin: - from aqt.tts import WindowsRTTTSFilePlayer, WindowsTTSPlayer + from aqt.tts import WindowsTTSPlayer av_player.players.append(WindowsTTSPlayer(taskman)) if platform.release() == "10": + from aqt.tts import WindowsRTTTSFilePlayer + # If Windows 10, ensure it's October 2018 update or later if int(platform.version().split(".")[-1]) >= 17763: av_player.players.append(WindowsRTTTSFilePlayer(taskman)) diff --git a/qt/aqt/tts.py b/qt/aqt/tts.py index 2536a8d56..467b6ee07 100644 --- a/qt/aqt/tts.py +++ b/qt/aqt/tts.py @@ -529,7 +529,7 @@ if isWin: tmppath = os.path.join(tmpdir(), "tts.wav") def import_voices(self) -> None: - import winrt.windows.media.speechsynthesis as speechsynthesis + import winrt.windows.media.speechsynthesis as speechsynthesis # type: ignore self.voice_list = speechsynthesis.SpeechSynthesizer.get_all_voices() @@ -569,8 +569,8 @@ if isWin: cb() async def speakText(self, tag: TTSTag, voice_id): - import winrt.windows.media.speechsynthesis as speechsynthesis - import winrt.windows.storage.streams as streams + import winrt.windows.media.speechsynthesis as speechsynthesis # type: ignore + import winrt.windows.storage.streams as streams # type: ignore synthesizer = speechsynthesis.SpeechSynthesizer()