1
0

llamaCppChat.ts 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. import { ChatbotBackend, VisionBackend } from "@/types/backend";
  2. import { Message } from "./messages";
  3. import { buildPrompt, buildVisionPrompt } from "@/utils/buildPrompt";
  4. export async function getLlamaCppChatResponseStream(config: ChatbotBackend["llamacpp"], name: string, system_prompt: string, messages: Message[]) {
  5. const headers: Record<string, string> = {
  6. "Content-Type": "application/json",
  7. "Connection": "keep-alive",
  8. "Accept": "text/event-stream",
  9. };
  10. const prompt = buildPrompt({ name: name, system_prompt: system_prompt },messages);
  11. const stop: string[] = [`${name}:`, ...`${config?.llamacpp_stop_sequence}`.split("||")];
  12. const res = await fetch(`${config?.llamacpp_url}/completion`, {
  13. headers: headers,
  14. method: "POST",
  15. body: JSON.stringify({
  16. stream: true,
  17. n_predict: 400,
  18. temperature: 0.7,
  19. cache_prompt: true,
  20. stop,
  21. prompt,
  22. }),
  23. });
  24. const reader = res.body?.getReader();
  25. if (res.status !== 200 || ! reader) {
  26. throw new Error(`LlamaCpp chat error (${res.status})`);
  27. }
  28. const stream = new ReadableStream({
  29. async start(controller: ReadableStreamDefaultController) {
  30. const decoder = new TextDecoder("utf-8");
  31. try {
  32. // sometimes the response is chunked, so we need to combine the chunks
  33. let combined = "";
  34. let cont = true;
  35. while (true) {
  36. const { done, value } = await reader.read();
  37. if (done || ! cont) break;
  38. const data = decoder.decode(value);
  39. const chunks = data
  40. .split("data:")
  41. .filter((val) => !!val && val.trim() !== "[DONE]");
  42. for (const chunk of chunks) {
  43. // skip comments
  44. if (chunk.length > 0 && chunk[0] === ":") {
  45. continue;
  46. }
  47. combined += chunk;
  48. try {
  49. const json = JSON.parse(combined);
  50. if (json.stop) {
  51. cont = false;
  52. }
  53. const messagePiece = json.content;
  54. combined = "";
  55. if (!!messagePiece) {
  56. controller.enqueue(messagePiece);
  57. }
  58. } catch (error) {
  59. console.error(error);
  60. }
  61. }
  62. }
  63. } catch (error) {
  64. console.error(error);
  65. controller.error(error);
  66. } finally {
  67. reader.releaseLock();
  68. controller.close();
  69. }
  70. },
  71. async cancel() {
  72. await reader?.cancel();
  73. reader.releaseLock();
  74. }
  75. });
  76. return stream;
  77. }
  78. export async function getLlavaCppChatResponse(name: string, vision_system_prompt: string, config: VisionBackend["vision_llamacpp"],messages: Message[], imageData: string) {
  79. const headers: Record<string, string> = {
  80. "Content-Type": "application/json",
  81. "Connection": "keep-alive",
  82. "Accept": "text/event-stream",
  83. };
  84. const prompt = buildVisionPrompt({name: name, vision_system_prompt: vision_system_prompt},messages);
  85. const res = await fetch(`${config?.vision_llamacpp_url}/completion`, {
  86. headers: headers,
  87. method: "POST",
  88. body: JSON.stringify({
  89. stream: true,
  90. n_predict: 400,
  91. temperature: 0.7,
  92. cache_prompt: true,
  93. stop: [
  94. "</s>",
  95. `${name}:`,
  96. "User:"
  97. ],
  98. image_data: [{
  99. data: imageData,
  100. id: 10,
  101. }],
  102. prompt,
  103. }),
  104. });
  105. if (! res.ok) {
  106. throw new Error(`LlamaCpp llava chat error (${res.status})`);
  107. }
  108. console.log('body', res.body);
  109. const reader = res.body?.getReader();
  110. if (res.status !== 200 || ! reader) {
  111. throw new Error(`LlamaCpp vision error (${res.status})`);
  112. }
  113. // Fetch the original image
  114. const stream = new ReadableStream({
  115. async start(controller: ReadableStreamDefaultController) {
  116. const decoder = new TextDecoder("utf-8");
  117. try {
  118. // sometimes the response is chunked, so we need to combine the chunks
  119. let combined = "";
  120. let cont = true;
  121. while (true) {
  122. const { done, value } = await reader.read();
  123. if (done || ! cont) break;
  124. const data = decoder.decode(value);
  125. const chunks = data
  126. .split("data:")
  127. .filter((val) => !!val && val.trim() !== "[DONE]");
  128. for (const chunk of chunks) {
  129. // skip comments
  130. if (chunk.length > 0 && chunk[0] === ":") {
  131. continue;
  132. }
  133. combined += chunk;
  134. try {
  135. const json = JSON.parse(combined);
  136. if (json.stop) {
  137. cont = false;
  138. }
  139. const messagePiece = json.content;
  140. combined = "";
  141. if (!!messagePiece) {
  142. controller.enqueue(messagePiece);
  143. }
  144. } catch (error) {
  145. console.error(error);
  146. }
  147. }
  148. }
  149. } catch (error) {
  150. console.error(error);
  151. controller.error(error);
  152. } finally {
  153. reader.releaseLock();
  154. controller.close();
  155. }
  156. },
  157. async cancel() {
  158. await reader?.cancel();
  159. reader.releaseLock();
  160. }
  161. });
  162. const sreader = await stream.getReader();
  163. let combined = "";
  164. while (true) {
  165. const { done, value } = await sreader.read();
  166. if (done) break;
  167. combined += value;
  168. }
  169. return combined;
  170. }