1
0

ttsDiagnosis.ts 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import { TTSBackend } from "@/types/backend";
  2. import { EvaluationResult } from "./diagnosisScript";
  3. const additionalUrls = {
  4. elevenlabs: "?optimize_streaming_latency=0&output_format=mp3_44100_128",
  5. openai_tts: "/v1/audio/speech",
  6. localXTTSTTS: "/api/tts-generate",
  7. piper: "/api/v1/generate",
  8. coquiLocal: "/api/tts",
  9. };
  10. const message = "Hello World";
  11. const TIME_OUT = 8000;
  12. const MIN_DURATION = 2000;
  13. // Utility to safely call fetch
  14. async function safeFetch(
  15. fullUrl: string,
  16. options?: RequestInit,
  17. timeoutMs = TIME_OUT
  18. ): Promise<EvaluationResult> {
  19. const controller = new AbortController();
  20. const id = setTimeout(() => controller.abort(), timeoutMs);
  21. const start = performance.now();
  22. try {
  23. if (!options) {
  24. const res = await fetch(fullUrl, {
  25. signal: controller.signal,
  26. });
  27. const end = performance.now();
  28. clearTimeout(id);
  29. const duration = end - start;
  30. const status = res.ok ? "pass" : "fail";
  31. const score = calculateScore({ status, duration });
  32. return { status, score };
  33. } else {
  34. const res = await fetch(fullUrl, {
  35. ...options,
  36. signal: controller.signal,
  37. });
  38. const end = performance.now();
  39. clearTimeout(id);
  40. const duration = end - start;
  41. const status = res.ok ? "pass" : "fail";
  42. const score = calculateScore({ status, duration });
  43. return { status, score };
  44. }
  45. } catch (err:any) {
  46. const end = performance.now();
  47. clearTimeout(id);
  48. const duration = end - start;
  49. const isAbort = err.name === "AbortError";
  50. return { status: "fail", score: calculateScore({ status: "fail", duration, timeout: isAbort }) };
  51. }
  52. }
  53. // Score calculation logic
  54. function calculateScore({
  55. status,
  56. duration,
  57. timeout = false,
  58. }: {
  59. status: "pass" | "fail";
  60. duration: number;
  61. timeout?: boolean;
  62. }): number {
  63. if (timeout) return 0;
  64. let score = 0;
  65. if (status === "pass") score += 50;
  66. if (duration < MIN_DURATION) score += 50 * ((MIN_DURATION - duration) / MIN_DURATION);
  67. return Math.round(score);
  68. }
  69. // Individual backend handlers
  70. const backendHandlers: Record<
  71. string,
  72. (params: TTSBackend) => Promise<EvaluationResult>
  73. > = {
  74. elevenlabs: async (params) => {
  75. const { elevenlabs_apikey, elevenlabs_voiceid, elevenlabs_model } = params.elevenlabs || {};
  76. if (!elevenlabs_apikey || !elevenlabs_voiceid || !elevenlabs_model) return {status: "fail", score: 0};
  77. const url = `https://api.elevenlabs.io/v1/text-to-speech/${elevenlabs_voiceid}`;
  78. return await safeFetch(`${url}${additionalUrls.elevenlabs}`, {
  79. method: "POST",
  80. headers: {
  81. "Content-Type": "application/json",
  82. Accept: "audio/mpeg",
  83. "xi-api-key": elevenlabs_apikey,
  84. },
  85. body: JSON.stringify({
  86. text: message,
  87. model_id: elevenlabs_model,
  88. voice_settings: {
  89. stability: 0,
  90. similarity_boost: 0,
  91. style: 0,
  92. use_speaker_boost: true,
  93. },
  94. }),
  95. });
  96. },
  97. openai_tts: async (params) => {
  98. const { openai_tts_apikey, openai_tts_url, openai_tts_model, openai_tts_voice } = params.openai_tts || {};
  99. if (!openai_tts_model || !openai_tts_apikey || !openai_tts_url || !openai_tts_voice) return {status: "fail", score: 0};
  100. return await safeFetch(`${openai_tts_url}${additionalUrls.openai_tts}`, {
  101. method: "POST",
  102. headers: {
  103. "Content-Type": "application/json",
  104. Authorization: `Bearer ${openai_tts_apikey}`,
  105. },
  106. body: JSON.stringify({
  107. model: openai_tts_model,
  108. input: message,
  109. voice: openai_tts_voice,
  110. }),
  111. });
  112. },
  113. localXTTSTTS: async (params) => {
  114. const { localXTTS_url, alltalk_version, alltalk_rvc_voice, alltalk_language, alltalk_rvc_pitch, alltalk_voice } =
  115. params.localXTTS || {};
  116. const baseUrl = localXTTS_url?.replace(/\/+$/, "").replace("/api/tts-generate", "");
  117. const formData = new URLSearchParams({
  118. text_input: message,
  119. text_filtering: "standard",
  120. character_voice_gen: alltalk_voice || "female_01.wav",
  121. narrator_enabled: "false",
  122. narrator_voice_gen: alltalk_voice || "female_01.wav",
  123. text_not_inside: "character",
  124. language: alltalk_language || "en",
  125. output_file_name: "amica_output",
  126. output_file_timestamp: "true",
  127. autoplay: "false",
  128. autoplay_volume: "0.8",
  129. });
  130. if (alltalk_version === "v2") {
  131. if (alltalk_rvc_voice && alltalk_rvc_voice !== "Disabled") {
  132. formData.append("rvccharacter_voice_gen", alltalk_rvc_voice);
  133. formData.append("rvccharacter_pitch", alltalk_rvc_pitch || "0");
  134. }
  135. }
  136. return await safeFetch(`${baseUrl}${additionalUrls.localXTTSTTS}`, {
  137. method: "POST",
  138. body: formData,
  139. });
  140. },
  141. piper: async (params) => {
  142. const { piper_url } = params.piper || {};
  143. if (!piper_url) return {status: "fail", score: 0};
  144. const newUrl = new URL(piper_url);
  145. newUrl.searchParams.append('text', message);
  146. return await safeFetch(newUrl.toString());
  147. },
  148. coquiLocal: async (params) => {
  149. const { coquiLocal_url, coquiLocal_voiceid } = params.coquiLocal || {};
  150. if (!coquiLocal_url || !coquiLocal_voiceid) return {status: "fail", score: 0};
  151. return await safeFetch(`${coquiLocal_url}${additionalUrls.coquiLocal}`, {
  152. method: "POST",
  153. headers: {
  154. 'text': message,
  155. 'speaker-id': coquiLocal_voiceid,
  156. }
  157. });
  158. },
  159. speecht5: async () => { return {status: "pass", score: 100}; },
  160. };
  161. // Dispatcher function
  162. export async function ttsDiagnosis(
  163. backend: string,
  164. params: TTSBackend,
  165. ): Promise<EvaluationResult> {
  166. const handler = backendHandlers[backend];
  167. if (!handler) return {status: "fail", score: 0};
  168. return await handler(params);
  169. }