useTranscriber.ts 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. import { useCallback, useMemo, useState } from "react";
  2. import { useWorker } from "./useWorker";
  3. import { updateFileProgress } from "@/utils/progress";
  4. interface ProgressItem {
  5. file: string;
  6. loaded: number;
  7. progress: number;
  8. total: number;
  9. name: string;
  10. status: string;
  11. }
  12. interface TranscriberUpdateData {
  13. data: [
  14. string,
  15. { chunks: { text: string; timestamp: [number, number | null] }[] },
  16. ];
  17. text: string;
  18. }
  19. interface TranscriberCompleteData {
  20. data: {
  21. text: string;
  22. chunks: { text: string; timestamp: [number, number | null] }[];
  23. };
  24. }
  25. export interface TranscriberData {
  26. isBusy: boolean;
  27. text: string;
  28. chunks: { text: string; timestamp: [number, number | null] }[];
  29. }
  30. export interface Transcriber {
  31. onInputChange: () => void;
  32. isBusy: boolean;
  33. isModelLoading: boolean;
  34. start: (audioData: AudioBuffer | undefined) => void;
  35. output?: TranscriberData;
  36. }
  37. export function useTranscriber(): Transcriber {
  38. const [transcript, setTranscript] = useState<TranscriberData | undefined>(
  39. undefined,
  40. );
  41. const [isBusy, setIsBusy] = useState(false);
  42. const [isModelLoading, setIsModelLoading] = useState(false);
  43. const webWorker = useWorker("whisper", (event) => {
  44. const message = event.data;
  45. // Update the state with the result
  46. switch (message.status) {
  47. case "progress":
  48. // Model file progress: update one of the progress items.
  49. updateFileProgress(message.file, message.progress);
  50. break;
  51. case "update":
  52. // Received partial update
  53. // console.log("update", message);
  54. // eslint-disable-next-line no-case-declarations
  55. const updateMessage = message as TranscriberUpdateData;
  56. setTranscript({
  57. isBusy: true,
  58. text: updateMessage.data[0],
  59. chunks: updateMessage.data[1].chunks,
  60. });
  61. break;
  62. case "complete":
  63. // Received complete transcript
  64. console.log("useTranscriber complete", message);
  65. // eslint-disable-next-line no-case-declarations
  66. const completeMessage = message as TranscriberCompleteData;
  67. setTranscript({
  68. isBusy: false,
  69. text: completeMessage.data.text,
  70. chunks: completeMessage.data.chunks,
  71. });
  72. setIsBusy(false);
  73. break;
  74. case "initiate":
  75. // Model file start load: add a new progress item to the list.
  76. setIsModelLoading(true);
  77. break;
  78. case "ready":
  79. setIsModelLoading(false);
  80. break;
  81. case "error":
  82. setIsBusy(false);
  83. alert(
  84. `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`,
  85. );
  86. break;
  87. case "done":
  88. // Model file loaded: remove the progress item from the list.
  89. updateFileProgress(message.file, 100);
  90. break;
  91. default:
  92. // initiate/download/done
  93. break;
  94. }
  95. });
  96. const onInputChange = useCallback(() => {
  97. setTranscript(undefined);
  98. }, []);
  99. const postRequest = useCallback(
  100. async (audioData: AudioBuffer | undefined) => {
  101. if (audioData) {
  102. setTranscript(undefined);
  103. setIsBusy(true);
  104. let audio;
  105. if (audioData.numberOfChannels === 2) {
  106. const SCALING_FACTOR = Math.sqrt(2);
  107. let left = audioData.getChannelData(0);
  108. let right = audioData.getChannelData(1);
  109. audio = new Float32Array(left.length);
  110. for (let i = 0; i < audioData.length; ++i) {
  111. audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
  112. }
  113. } else {
  114. // If the audio is not stereo, we can just use the first channel:
  115. audio = audioData.getChannelData(0);
  116. }
  117. webWorker.postMessage({
  118. audio,
  119. });
  120. }
  121. },
  122. [webWorker],
  123. );
  124. const transcriber = useMemo(() => {
  125. return {
  126. onInputChange,
  127. isBusy,
  128. isModelLoading,
  129. start: postRequest,
  130. output: transcript,
  131. };
  132. }, [
  133. isBusy,
  134. isModelLoading,
  135. postRequest,
  136. transcript,
  137. ]);
  138. return transcriber;
  139. }