import * as A from "fp-ts/lib/Array";
import * as O from "fp-ts/lib/Option";
import { pipe } from "fp-ts/lib/function";
import { toLower } from "fp-ts-extras/lib/String";
import * as SpeechSDK from "microsoft-cognitiveservices-speech-sdk";
import { UUID } from "io-ts-types";

const synth = window.speechSynthesis;

const findVoiceByName = (name: string): O.Option<SpeechSynthesisVoice> => {
  const voices = synth.getVoices();

  return pipe(
    voices,
    A.findLast((voice) => toLower(voice.name).startsWith(toLower(name))),
  );
};

const speak = (
  text: string,
  voideId: string,
  locale: string | null,
  authorizationToken: string,
  projectId: UUID,
) => {
  window.gtag("event", "speak", {
    event_category: "speech",
    event_label: voideId,
    project_id: projectId,
  });

  pipe(
    voideId,
    findVoiceByName,
    O.fold(
      () => {
        var audioConfig = SpeechSDK.AudioConfig.fromDefaultSpeakerOutput();
        const speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(
          authorizationToken,
          process.env.REACT_APP_TTS_REGION || "eastus",
        );

        // speechConfig.speechSynthesisVoiceName = voideId;
        // speechConfig.speechSynthesisLanguage = locale;

        const synthesizer = new SpeechSDK.SpeechSynthesizer(
          speechConfig,
          audioConfig,
        );

        const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' xmlns:emo='http://www.w3.org/2009/10/emotionml' xml:lang='en-US'><voice name='${voideId}'>
          ${
            locale
              ? `<lang xml:lang="${locale}">
            ${text}
          </lang>`
              : text
          }
        </voice></speak>`;

        synthesizer.speakSsmlAsync(
          ssml,
          (result) => {
            synthesizer.close();
          },
          (error) => {
            console.error("speakTextAsync error: ", error);
            synthesizer.close();
          },
        );
      },
      (voice) => {
        const utterThis = new SpeechSynthesisUtterance(text);
        utterThis.voice = voice;
        synth.speak(utterThis);
      },
    ),
  );
};

export default speak;
