| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847 |
- import { Queue } from "@/utils/queue"
- import { Message, Role, Screenplay, Talk, textsToScreenplay } from "./messages";
- import { Viewer } from "@/features/vrmViewer/viewer";
- import { getEchoChatResponseStream } from "./echoChat";
- import {
- getOpenAiChatResponseStream,
- getOpenAiVisionChatResponse,
- } from "./openAiChat";
- import {
- getLlamaCppChatResponseStream,
- getLlavaCppChatResponse,
- } from "./llamaCppChat";
- import { getWindowAiChatResponseStream } from "./windowAiChat";
- import {
- getOllamaChatResponseStream,
- getOllamaVisionChatResponse,
- } from "./ollamaChat";
- import { getKoboldAiChatResponseStream } from "./koboldAiChat";
- import { rvc } from "@/features/rvc/rvc";
- import { coquiLocal } from "@/features/coquiLocal/coquiLocal";
- import { piper } from "@/features/piper/piper";
- import { elevenlabs } from "@/features/elevenlabs/elevenlabs";
- import { speecht5 } from "@/features/speecht5/speecht5";
- import { openaiTTS } from "@/features/openaiTTS/openaiTTS";
- import { localXTTSTTS } from "@/features/localXTTS/localXTTS";
- import { AmicaLife } from "@/features/amicaLife/amicaLife";
- import { cleanTalk } from "@/utils/cleanTalk";
- import { processResponse } from "@/utils/processResponse";
- import { wait } from "@/utils/wait";
- import {
- isCharacterIdle,
- characterIdleTime,
- resetIdleTimer,
- } from "@/utils/isIdle";
- import { getOpenRouterChatResponseStream } from "./openRouterChat";
- import {
- AmicaLifeParams,
- ChatbotBackend,
- RVC,
- STTBackend,
- TTSBackend,
- VisionBackend,
- } from "@/types/backend";
- import { loadAudioAsFloat32Array } from "../diagnosed/sttDiagnosis";
- import { WaveFile } from "wavefile";
- import { openaiWhisper } from "../openaiWhisper/openaiWhisper";
- import { Transcriber } from "@/hooks/useTranscriber";
- import { whispercpp } from "../whispercpp/whispercpp";
- type Speak = {
- audioBuffer: ArrayBuffer | null;
- screenplay: Screenplay;
- streamIdx: number;
- };
- type TTSJob = {
- screenplay: Screenplay;
- streamIdx: number;
- };
- export interface ChatConfig {
- name: string;
- tts_backend:
- | "none"
- | "piper"
- | "coqui"
- | "elevenlabs"
- | "speecht5"
- | "openai_tts"
- | "localXTTS"
- | "coquiLocal";
- chatbot_backend:
- | "openai"
- | "llamacpp"
- | "ollama"
- | "koboldai"
- | "windowai"
- | "openrouter";
- stt_backend: "whisper_browser" | "whisper_openai" | "whispercpp";
- vision_backend: "vision_llamacpp" | "vision_ollama" | "vision_openai";
- system_prompt: string;
- vision_system_prompt: string;
- chatbot_params: ChatbotBackend;
- tts_params: TTSBackend;
- stt_params: STTBackend;
- vision_params: VisionBackend;
- amica_life_params: AmicaLifeParams;
- rvc_params: RVC;
- // Add more as needed
- }
- export class Chat {
- public initialized: boolean;
- private shouldStopProcessing = false;
- public transcriber?: Transcriber;
- public amicaLife?: AmicaLife;
- public viewer?: Viewer;
- public setChatLog?: (messageLog: Message[]) => void;
- public setUserMessage?: (message: string) => void;
- public setAssistantMessage?: (message: string) => void;
- public setShownMessage?: (role: Role) => void;
- public setChatProcessing?: (processing: boolean) => void;
- public setChatSpeaking?: (speaking: boolean) => void;
- public setWhisperCppOutput?: (output: any) => void;
- public setWhisperOpenAIOutput?: (output: any) => void;
- // the message from the user that is currently being processed
- // it can be reset
- public stream: ReadableStream<Uint8Array> | null;
- public streams: ReadableStream<Uint8Array>[];
- public reader: ReadableStreamDefaultReader<Uint8Array> | null;
- public readers: ReadableStreamDefaultReader<Uint8Array>[];
- // process these immediately as they come in and add to audioToPlay
- public ttsJobs: Queue<TTSJob>;
- // this should be read as soon as they exist
- // and then deleted from the queue
- public speakJobs: Queue<Speak>;
- private currentAssistantMessage: string;
- private currentUserMessage: string;
- private lastAwake: number;
- public messageList: Message[];
- public currentStreamIdx: number;
- public config?: ChatConfig;
- private onChatCompleteResolver?: () => void;
- public onChatComplete?: Promise<void>;
- constructor() {
- this.initialized = false;
- this.stream = null;
- this.reader = null;
- this.streams = [];
- this.readers = [];
- this.ttsJobs = new Queue<TTSJob>();
- this.speakJobs = new Queue<Speak>();
- this.currentAssistantMessage = "";
- this.currentUserMessage = "";
- this.messageList = [];
- this.currentStreamIdx = 0;
- this.lastAwake = 0;
- }
- public initialize(
- transcriber: Transcriber,
- amicaLife: AmicaLife,
- viewer: Viewer,
- setUserMessage: (message: string) => void,
- setAssistantMessage: (message: string) => void,
- setShownMessage: (role: Role) => void,
- setChatProcessing: (processing: boolean) => void,
- setChatSpeaking: (speaking: boolean) => void,
- setWhisperCppOutput: (output: any) => void,
- setWhisperOpenAIOutput: (output: any) => void,
- config: ChatConfig,
- ) {
- this.transcriber = transcriber;
- this.amicaLife = amicaLife;
- this.viewer = viewer;
- this.setUserMessage = setUserMessage;
- this.setAssistantMessage = setAssistantMessage;
- this.setShownMessage = setShownMessage;
- this.setChatProcessing = setChatProcessing;
- this.setChatSpeaking = setChatSpeaking;
- this.setWhisperCppOutput = setWhisperCppOutput;
- this.setWhisperOpenAIOutput = setWhisperOpenAIOutput;
- this.config = config;
- console.log("Config ", config)
- this.shouldStopProcessing = false;
- // these will run forever
- this.processTtsJobs();
- this.processSpeakJobs();
- this.updateAwake();
- this.initialized = true;
- }
- public setMessageList(messages: Message[]) {
- this.messageList = messages;
- this.currentAssistantMessage = "";
- this.currentUserMessage = "";
- // this.setChatLog!(this.messageList!);
- this.setAssistantMessage!(this.currentAssistantMessage);
- this.setUserMessage!(this.currentAssistantMessage);
- this.currentStreamIdx++;
- }
- public async handleRvc(audio: any) {
- const rvcParams = this.config?.rvc_params;
- const rvcUrl = rvcParams?.rvc_url!;
- const rvcModelName = rvcParams?.rvc_model_name!;
- const rvcIndexPath = rvcParams?.rvc_index_path!;
- const rvcF0upKey = parseInt(rvcParams?.rvc_f0_upkey!)!;
- const rvcF0Method = rvcParams?.rvc_f0_method!;
- const rvcIndexRate = rvcParams?.rvc_index_rate!;
- const rvcFilterRadius = parseInt(rvcParams?.rvc_filter_radius!);
- const rvcResampleSr = parseInt(rvcParams?.rvc_resample_sr!);
- const rvcRmsMixRate = parseInt(rvcParams?.rvc_rms_mix_rate!);
- const rvcProtect = parseInt(rvcParams?.rvc_protect!);
- const voice = await rvc(
- audio,
- rvcUrl,
- rvcModelName,
- rvcIndexPath,
- rvcF0upKey,
- rvcF0Method,
- rvcIndexRate,
- rvcFilterRadius,
- rvcResampleSr,
- rvcRmsMixRate,
- rvcProtect,
- );
- return voice.audio;
- }
- public idleTime(): number {
- return characterIdleTime(this.config?.amica_life_params.time_to_sleep_sec!,this.lastAwake);
- }
- public isAwake() {
- return !isCharacterIdle(this.config?.amica_life_params.time_to_sleep_sec!,this.lastAwake);
- }
- public updateAwake() {
- this.lastAwake = new Date().getTime();
- resetIdleTimer();
- }
- public async processTtsJobs() {
- while (!this.shouldStopProcessing) {
- do {
- if (this.shouldStopProcessing) return;
- const ttsJob = this.ttsJobs.dequeue();
- if (!ttsJob) break;
- if (ttsJob.streamIdx !== this.currentStreamIdx) {
- continue;
- }
- const audioBuffer = await this.fetchAudio(ttsJob.screenplay.talk);
- this.speakJobs.enqueue({
- audioBuffer,
- screenplay: ttsJob.screenplay,
- streamIdx: ttsJob.streamIdx,
- });
- } while (this.ttsJobs.size() > 0);
- await wait(50);
- }
- }
- public async processSpeakJobs() {
- while (!this.shouldStopProcessing) {
- do {
- if (this.shouldStopProcessing) return;
- const speak = this.speakJobs.dequeue();
- if (!speak) break;
- if (speak.streamIdx !== this.currentStreamIdx) continue;
- this.bubbleMessage("assistant", speak.screenplay.text);
- if (speak.audioBuffer) {
- this.setChatSpeaking!(true);
- await this.viewer!.model?.speak(speak.audioBuffer, speak.screenplay);
- this.setChatSpeaking!(false);
- if (this.isAwake()) this.updateAwake();
- }
- // Resolve full chat complete
- if (this.speakJobs.size() === 0 && this.ttsJobs.size() === 0) {
- this.onChatCompleteResolver?.();
- }
- } while (this.speakJobs.size() > 0);
- await wait(50);
- }
- }
- public async runFullInteraction(message: string, vision: boolean) {
- vision ? await this.getVisionResponse(message) : await this.receiveMessageFromUser(message,false);
- await wait(3000);
- await this.onChatComplete;
- }
- public bubbleMessage(role: Role, text: string) {
- // TODO: currentUser & Assistant message should be contain the message with emotion in it
- if (role === "user") {
- // add space if there is already a partial message
- if (this.currentUserMessage !== "") {
- this.currentUserMessage += " ";
- }
- this.currentUserMessage += text;
- this.setUserMessage!(this.currentUserMessage);
- this.setAssistantMessage!("");
- if (this.currentAssistantMessage !== "") {
- this.messageList!.push({
- role: "assistant",
- content: this.currentAssistantMessage,
- });
- this.currentAssistantMessage = "";
- }
- // this.setChatLog!([
- // ...this.messageList!,
- // { role: "user", content: this.currentUserMessage },
- // ]);
- }
- if (role === "assistant") {
- if (
- this.currentAssistantMessage != "" &&
- !this.isAwake() &&
- this.config?.amica_life_params.amica_life_enabled === "true"
- ) {
- this.messageList!.push({
- role: "assistant",
- content: this.currentAssistantMessage,
- });
- this.currentAssistantMessage = text;
- this.setAssistantMessage!(this.currentAssistantMessage);
- } else {
- this.currentAssistantMessage += text;
- this.setUserMessage!("");
- this.setAssistantMessage!(this.currentAssistantMessage);
- }
- if (this.currentUserMessage !== "") {
- this.messageList!.push({
- role: "user",
- content: this.currentUserMessage,
- });
- this.currentUserMessage = "";
- }
- // this.setChatLog!([
- // ...this.messageList!,
- // { role: "assistant", content: this.currentAssistantMessage },
- // ]);
- }
- this.setShownMessage!(role);
- console.debug("bubbler", this.messageList);
- }
- public async interrupt() {
- this.currentStreamIdx++;
- try {
- if (this.reader) {
- console.debug("cancelling");
- if (!this.reader?.closed) {
- await this.reader?.cancel();
- }
- // this.reader = null;
- // this.stream = null;
- console.debug("finished cancelling");
- }
- } catch (e: any) {
- console.error(e.toString());
- }
- // TODO if llm type is llama.cpp, we can send /stop message here
- this.ttsJobs.clear();
- this.speakJobs.clear();
- // TODO stop viewer from speaking
- }
- // this happens either from text or from voice / whisper completion
- public async receiveMessageFromUser(message: string, amicaLife: boolean) {
- if (message === null || message === "") {
- return;
- }
- this.onChatComplete = new Promise<void>((resolve) => {
- this.onChatCompleteResolver = resolve;
- });
- console.time("performance_interrupting");
- console.debug("interrupting...");
- await this.interrupt();
- console.timeEnd("performance_interrupting");
- await wait(0);
- console.debug("wait complete");
- if (!amicaLife) {
- console.log("receiveMessageFromUser", message);
- this.amicaLife?.receiveMessageFromUser(message);
- if (!/\[.*?\]/.test(message)) {
- message = `[neutral] ${message}`;
- }
- this.updateAwake();
- this.bubbleMessage("user", message);
- }
- // make new stream
- const messages: Message[] = [
- { role: "system", content: this.config?.system_prompt! },
- ...this.messageList!,
- { role: "user", content: amicaLife ? message : this.currentUserMessage },
- ];
- // console.debug('messages', messages);
- await this.makeAndHandleStream(messages);
- }
- public async makeAndHandleStream(messages: Message[]) {
- try {
- this.streams.push(await this.getChatResponseStream(messages));
- } catch (e: any) {
- const errMsg = e.toString();
- console.error("Failed to get chat response", errMsg);
- return errMsg;
- }
- if (this.streams[this.streams.length - 1] == null) {
- const errMsg = "Error: Null stream encountered.";
- console.error("Null stream encountered", errMsg);
- return errMsg;
- }
- return await this.handleChatResponseStream();
- }
- public async handleChatResponseStream() {
- if (this.streams.length === 0) {
- console.log("no stream!");
- return;
- }
- this.currentStreamIdx++;
- const streamIdx = this.currentStreamIdx;
- this.setChatProcessing!(true);
- console.time("chat stream processing");
- let reader = this.streams[this.streams.length - 1].getReader();
- this.readers.push(reader);
- let sentences = new Array<string>();
- let aiTextLog = "";
- let tag = "";
- let isThinking = false;
- let rolePlay = "";
- let receivedMessage = "";
- let firstTokenEncountered = false;
- let firstSentenceEncountered = false;
- console.time("performance_time_to_first_token");
- console.time("performance_time_to_first_sentence");
- try {
- while (true) {
- if (this.currentStreamIdx !== streamIdx) {
- console.log("wrong stream idx");
- break;
- }
- const { done, value } = await reader.read();
- if (!firstTokenEncountered) {
- console.timeEnd("performance_time_to_first_token");
- firstTokenEncountered = true;
- }
- if (done) break;
- receivedMessage += value;
- receivedMessage = receivedMessage.trimStart();
- const proc = processResponse({
- sentences,
- aiTextLog,
- receivedMessage,
- tag,
- isThinking,
- rolePlay,
- callback: (aiTalks: Screenplay[]): boolean => {
- // Generate & play audio for each sentence, display responses
- console.debug("enqueue tts", aiTalks);
- console.debug(
- "streamIdx",
- streamIdx,
- "currentStreamIdx",
- this.currentStreamIdx,
- );
- if (streamIdx !== this.currentStreamIdx) {
- console.log("wrong stream idx");
- return true; // should break
- }
- this.ttsJobs.enqueue({
- screenplay: aiTalks[0],
- streamIdx: streamIdx,
- });
- if (!firstSentenceEncountered) {
- console.timeEnd("performance_time_to_first_sentence");
- firstSentenceEncountered = true;
- }
- return false; // normal processing
- },
- });
- sentences = proc.sentences;
- aiTextLog = proc.aiTextLog;
- receivedMessage = proc.receivedMessage;
- tag = proc.tag;
- rolePlay = proc.rolePlay;
- if (proc.shouldBreak) {
- break;
- }
- }
- } catch (e: any) {
- const errMsg = e.toString();
- this.bubbleMessage!("assistant", errMsg);
- console.error(errMsg);
- } finally {
- if (!reader.closed) {
- reader.releaseLock();
- }
- console.timeEnd("chat stream processing");
- if (streamIdx === this.currentStreamIdx) {
- this.setChatProcessing!(false);
- }
- }
- return aiTextLog;
- }
- // TTS
- public async fetchAudio(talk: Talk): Promise<ArrayBuffer | null> {
- // TODO we should remove non-speakable characters
- // since this depends on the tts backend, we should do it
- // in their respective functions
- // this is just a simple solution for now
- talk = cleanTalk(talk);
- if (talk.message.trim() === "") {
- return null;
- }
- const params = this.config?.tts_params;
- const rvcParams = this.config?.rvc_params;
- const rvcEnabled = rvcParams?.rvc_enabled === "true";
- try {
- switch (this.config?.tts_backend) {
- case "none": {
- return null;
- }
- case "elevenlabs": {
- const p = params as TTSBackend["elevenlabs"]
- const voiceId = p?.elevenlabs_voiceid!;
- const voice = await elevenlabs(
- p,
- talk.message,
- voiceId,
- );
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- case "speecht5": {
- const p = params as TTSBackend["speecht5"]
- const speakerEmbeddingUrl = p?.speecht5_speaker_embedding_url!;
- const voice = await speecht5(talk.message, speakerEmbeddingUrl);
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- case "openai_tts": {
- const voice = await openaiTTS(params as TTSBackend["openai_tts"], talk.message);
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- case "localXTTS": {
- const voice = await localXTTSTTS(params as TTSBackend["localXTTS"], talk.message);
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- case "piper": {
- const voice = await piper(params as TTSBackend["piper"], talk.message);
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- case "coquiLocal": {
- const voice = await coquiLocal(params as TTSBackend["coquiLocal"], talk.message);
- if (rvcEnabled) {
- return await this.handleRvc(voice.audio);
- }
- return voice.audio;
- }
- }
- } catch (e: any) {
- console.error("Failed to get TTS response", e.toString());
- }
- return null;
- }
- // Chatbot
- public async getChatResponseStream(messages: Message[]) {
- console.debug("getChatResponseStream", messages);
- const chatbotBackend = this.config?.chatbot_backend;
- const name = this.config?.name!;
- const system_prompt = this.config?.system_prompt!;
- const params = this.config?.chatbot_params;
- switch (chatbotBackend) {
- case "openai":
- return getOpenAiChatResponseStream(
- params as ChatbotBackend["openai"],
- messages,
- );
- case "llamacpp":
- return getLlamaCppChatResponseStream(
- params as ChatbotBackend["llamacpp"],
- name,
- system_prompt,
- messages,
- );
- case "windowai":
- return getWindowAiChatResponseStream(name, messages);
- case "ollama":
- return getOllamaChatResponseStream(
- params as ChatbotBackend["ollama"],
- messages,
- );
- case "koboldai":
- return getKoboldAiChatResponseStream(
- name,
- system_prompt,
- params as ChatbotBackend["koboldai"],
- messages,
- );
- case "openrouter":
- return getOpenRouterChatResponseStream(
- params as ChatbotBackend["openrouter"],
- messages,
- );
- }
- return getEchoChatResponseStream(messages);
- }
- // STT
- public async getSTTResponse() {
- let audio = await loadAudioAsFloat32Array("/sample-voice.wav");
- try {
- switch (this.config?.stt_backend) {
- case "whisper_browser": {
- console.debug("whisper_browser attempt");
- // since VAD sample rate is same as whisper we do nothing here
- // both are 16000
- const audioCtx = new AudioContext();
- const buffer = audioCtx.createBuffer(1, audio.length, 16000);
- buffer.copyToChannel(new Float32Array(audio), 0, 0);
- this.transcriber?.start(buffer);
- break;
- }
- case "whisper_openai": {
- console.debug("whisper_openai attempt");
- const wav = new WaveFile();
- wav.fromScratch(1, 16000, "32f", audio);
- const file = new File([new Uint8Array(wav.toBuffer())], "input.wav", { type: "audio/wav" });
- let prompt;
- // TODO load prompt if it exists
- (async () => {
- try {
- const transcript = await openaiWhisper(this.config?.stt_params.whisper_openai,file, prompt);
- this.setWhisperOpenAIOutput!(transcript);
- } catch (e: any) {
- console.error("whisper_openai error", e);
- }
- })();
- break;
- }
- case "whispercpp": {
- console.debug("whispercpp attempt");
- const wav = new WaveFile();
- wav.fromScratch(1, 16000, "32f", audio);
- wav.toBitDepth("16");
- const file = new File([new Uint8Array(wav.toBuffer())], "input.wav", { type: "audio/wav" });
- let prompt;
- // TODO load prompt if it exists
- (async () => {
- try {
- const transcript = await whispercpp(this.config?.stt_params.whispercpp,file, prompt);
- this.setWhisperCppOutput!(transcript);
- } catch (e: any) {
- console.error("whispercpp error", e);
- }
- })();
- break;
- }
- }
- } catch (e: any) {
- console.error("stt_backend error", e);
- }
- }
- // Vision
- public async getVisionResponse(imageData: string, onlyVisionResponse?: boolean) {
- try {
- const visionBackend = this.config?.vision_backend;
- const name = this.config?.name!;
- const vision_system_prompt = this.config?.vision_system_prompt!;
- const params = this.config?.vision_params;
- console.debug("vision_backend", visionBackend);
- this.onChatComplete = new Promise<void>((resolve) => {
- this.onChatCompleteResolver = resolve;
- });
- let res = "";
- if (visionBackend === "vision_llamacpp") {
- const messages: Message[] = [
- { role: "system", content: vision_system_prompt },
- ...this.messageList!,
- {
- role: "user",
- content: "Describe the image as accurately as possible",
- },
- ];
- res = await getLlavaCppChatResponse(
- name,
- vision_system_prompt,
- params as VisionBackend["vision_llamacpp"],
- messages,
- imageData,
- );
- } else if (visionBackend === "vision_ollama") {
- const messages: Message[] = [
- { role: "system", content: vision_system_prompt },
- ...this.messageList!,
- {
- role: "user",
- content: "Describe the image as accurately as possible",
- },
- ];
- res = await getOllamaVisionChatResponse(
- params as VisionBackend["vision_ollama"],
- messages,
- imageData,
- );
- } else if (visionBackend === "vision_openai") {
- const messages: Message[] = [
- { role: "user", content: vision_system_prompt },
- ...(this.messageList! as any[]),
- {
- role: "user",
- // @ts-ignore normally this is a string
- content: [
- {
- type: "text",
- text: "Describe the image as accurately as possible",
- },
- {
- type: "image_url",
- image_url: {
- url: `data:image/jpeg;base64,${imageData}`,
- },
- },
- ],
- },
- ];
- res = await getOpenAiVisionChatResponse(
- params as VisionBackend["vision_openai"],
- messages,
- );
- } else {
- console.warn("vision_backend not supported", visionBackend);
- return;
- }
- if (onlyVisionResponse) {
- return res;
- }
- await this.makeAndHandleStream([
- { role: "system", content: this.config?.system_prompt! },
- ...this.messageList!,
- {
- role: "user",
- content: `This is a picture I just took from my webcam (described between [[ and ]] ): [[${res}]] Please respond accordingly and as if it were just sent and as though you can see it.`,
- },
- ]);
- } catch (e: any) {
- console.error("Failed to get vision response ", e.toString());
- }
- }
- public clean() {
- console.log("Stopping all chat processes...");
- this.shouldStopProcessing = true;
- // Cancel any readers or streams if needed
- this.interrupt();
- // You could also optionally clear queues
- this.ttsJobs.clear();
- this.speakJobs.clear();
- this.initialized = false;
- }
- }
|