# 07-07: Ollama 本地模型部署使用 Ollama 在本地运行大语言模型。

In [None]:
// Ollama 提供与 OpenAI 兼容的 API
// 基础 URL: http://localhost:11434
const OLLAMA_BASE_URL = 'http://localhost:11434';
// ========== 1. 列出本地模型 ==========
async function listModels() {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/tags`);
  const data = await response.json();
  return data.models;
}
// 返回示例:
// [
//   { name: 'llama3.2:latest', model: 'llama3.2:latest', ... },
//   { name: 'codellama:latest', model: 'codellama:latest', ... },
//   { name: 'mistral:latest', model: 'mistral:latest', ... }
// ]

In [None]:
// ========== 2. 生成文本 ==========
async function generateText(prompt: string, model: string = 'llama3.2') {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model,
      prompt,
      stream: false
    })
  });
  
  const data = await response.json();
  return data.response;
}
// 使用示例
// const result = await generateText('Explain quantum computing in simple terms');
// console.log(result);

In [None]:
// ========== 3. 流式生成 ==========
async function* generateStream(prompt: string, model: string = 'llama3.2') {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model,
      prompt,
      stream: true
    })
  });
  
  const reader = response.body!.getReader();
  const decoder = new TextDecoder();
  
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    
    const chunk = decoder.decode(value);
    const lines = chunk.split('\n').filter(line => line.trim());
    
    for (const line of lines) {
      try {
        const data = JSON.parse(line);
        if (data.response) {
          yield data.response;
        }
        if (data.done) break;
      } catch {
        // Ignore invalid JSON
      }
    }
  }
}
// 使用示例
// for await (const chunk of generateStream('Write a poem about coding')) {
//   process.stdout.write(chunk);
// }

In [None]:
// ========== 4. 聊天对话 ==========
interface ChatMessage {
  role: 'system' | 'user' | 'assistant';
  content: string;
}
async function chat(messages: ChatMessage[], model: string = 'llama3.2') {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model,
      messages,
      stream: false
    })
  });
  
  const data = await response.json();
  return data.message;
}
// 使用示例
// const response = await chat([
//   { role: 'system', content: 'You are a helpful assistant.' },
//   { role: 'user', content: 'Hello!' }
// ]);
// console.log(response.content);

In [None]:
// ========== 5. 拉取模型 ==========
async function pullModel(model: string) {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/pull`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      name: model,
      stream: false
    })
  });
  
  return await response.json();
}
// 删除模型
async function deleteModel(model: string) {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/delete`, {
    method: 'DELETE',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ name: model })
  });
  
  return response.ok;
}

In [None]:
// ========== 6. 嵌入向量 ==========
async function generateEmbedding(text: string, model: string = 'nomic-embed-text') {
  const response = await fetch(`${OLLAMA_BASE_URL}/api/embeddings`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model,
      prompt: text
    })
  });
  
  const data = await response.json();
  return data.embedding; // number[]
}
// 使用示例
// const embedding = await generateEmbedding('Hello world');
// console.log('Embedding size:', embedding.length); // 通常是 768 维

In [None]:
// ========== 7. OpenAI 兼容 API ==========
// Ollama 提供 OpenAI 兼容的端点
async function openAICompatibleChat() {
  const response = await fetch(`${OLLAMA_BASE_URL}/v1/chat/completions`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model: 'llama3.2',
      messages: [
        { role: 'user', content: 'Say hello' }
      ]
    })
  });
  
  const data = await response.json();
  return data.choices[0].message.content;
}
// 可以直接使用 OpenAI SDK 并修改 baseURL
// import OpenAI from 'openai';
// const client = new OpenAI({
//   baseURL: 'http://localhost:11434/v1',
//   apiKey: 'ollama' // required but ignored
// });