Merge pull request #855 from rye761/winrttts

WinRT TTS API support
This commit is contained in:
Damien Elmes 2020-12-21 12:12:03 +10:00 committed by GitHub
commit b1293d6758
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 1 deletions

View File

@ -66,9 +66,10 @@ David Allison <davidallisongithub@gmail.com>
Tsung-Han Yu <johan456789@gmail.com> Tsung-Han Yu <johan456789@gmail.com>
Piotr Kubowicz <piotr.kubowicz@gmail.com> Piotr Kubowicz <piotr.kubowicz@gmail.com>
RumovZ <gp5glkw78@relay.firefox.com> RumovZ <gp5glkw78@relay.firefox.com>
Cecini <github.com/cecini> Cecini <github.com/cecini>
Krish Shah <github.com/k12ish> Krish Shah <github.com/k12ish>
ianki <iankigit@gmail.com> ianki <iankigit@gmail.com>
rye761 <ryebread761@gmail.com>
******************** ********************

View File

@ -5,6 +5,7 @@ from __future__ import annotations
import atexit import atexit
import os import os
import platform
import re import re
import subprocess import subprocess
import sys import sys
@ -920,5 +921,12 @@ def setup_audio(taskman: TaskManager, base_folder: str) -> None:
av_player.players.append(WindowsTTSPlayer(taskman)) av_player.players.append(WindowsTTSPlayer(taskman))
if platform.release() == "10":
from aqt.tts import WindowsRTTTSFilePlayer
# If Windows 10, ensure it's October 2018 update or later
if int(platform.version().split(".")[-1]) >= 17763:
av_player.players.append(WindowsRTTTSFilePlayer(taskman))
# cleanup at shutdown # cleanup at shutdown
atexit.register(av_player.shutdown) atexit.register(av_player.shutdown)

View File

@ -25,9 +25,11 @@ expose the name of the engine, which would mean the user could write
from __future__ import annotations from __future__ import annotations
import asyncio
import os import os
import re import re
import subprocess import subprocess
import threading
from concurrent.futures import Future from concurrent.futures import Future
from dataclasses import dataclass from dataclasses import dataclass
from operator import attrgetter from operator import attrgetter
@ -468,6 +470,7 @@ if isWin:
return LCIDS.get(dec_str, "unknown") return LCIDS.get(dec_str, "unknown")
class WindowsTTSPlayer(TTSProcessPlayer): class WindowsTTSPlayer(TTSProcessPlayer):
default_rank = -1
try: try:
speaker = win32com.client.Dispatch("SAPI.SpVoice") speaker = win32com.client.Dispatch("SAPI.SpVoice")
except: except:
@ -516,3 +519,74 @@ if isWin:
"eg. 1.5 -> 15, 0.5 -> -5" "eg. 1.5 -> 15, 0.5 -> -5"
speed = (speed * 10) - 10 speed = (speed * 10) - 10
return int(max(-10, min(10, speed))) return int(max(-10, min(10, speed)))
@dataclass
class WindowsRTVoice(TTSVoice):
id: Any
class WindowsRTTTSFilePlayer(TTSProcessPlayer):
voice_list = None
tmppath = os.path.join(tmpdir(), "tts.wav")
def import_voices(self) -> None:
import winrt.windows.media.speechsynthesis as speechsynthesis # type: ignore
self.voice_list = speechsynthesis.SpeechSynthesizer.get_all_voices()
def get_available_voices(self) -> List[TTSVoice]:
t = threading.Thread(target=self.import_voices)
t.start()
t.join()
return list(map(self._voice_to_object, self.voice_list))
def _voice_to_object(self, voice: Any) -> TTSVoice:
return WindowsRTVoice(
id=voice.id,
name=voice.display_name.replace(" ", "_"),
lang=voice.language.replace("-", "_"),
)
def _play(self, tag: AVTag) -> None:
assert isinstance(tag, TTSTag)
match = self.voice_for_tag(tag)
assert match
voice = cast(WindowsRTVoice, match.voice)
self._taskman.run_on_main(
lambda: gui_hooks.av_player_did_begin_playing(self, tag)
)
asyncio.run(self.speakText(tag, voice.id))
def _on_done(self, ret: Future, cb: OnDoneCallback):
ret.result()
# inject file into the top of the audio queue
from aqt.sound import av_player
av_player.insert_file(self.tmppath)
# then tell player to advance, which will cause the file to be played
cb()
async def speakText(self, tag: TTSTag, voice_id):
import winrt.windows.media.speechsynthesis as speechsynthesis # type: ignore
import winrt.windows.storage.streams as streams # type: ignore
synthesizer = speechsynthesis.SpeechSynthesizer()
voices = speechsynthesis.SpeechSynthesizer.get_all_voices()
voice_match = next(filter(lambda v: v.id == voice_id, voices))
assert voice_match
synthesizer.voice = voice_match
synthesizer.options.speaking_rate = tag.speed
stream = await synthesizer.synthesize_text_to_stream_async(tag.field_text)
inputStream = stream.get_input_stream_at(0)
dataReader = streams.DataReader(inputStream)
dataReader.load_async(stream.size)
f = open(self.tmppath, "wb")
for x in range(stream.size):
f.write(bytes([dataReader.read_byte()]))
f.close()