Rust AI模型集成库ollama-rs的使用

ollama-rs是一个简单易用的库，用于与Ollama API进行交互。它为Rust开发者提供了便捷的Ollama大语言模型API调用与本地推理功能。

安装

在Cargo.toml中添加ollama-rs依赖:

[dependencies]
ollama-rs = "0.3.2"

如果想使用最新版本，可以添加Git仓库的主分支:

[dependencies]
ollama-rs = { git = "https://github.com/pepperoni21/ollama-rs.git", branch = "master" }

初始化

默认连接localhost:11434:

use ollama_rs::Ollama;

// 默认连接localhost:11434
let ollama = Ollama::default();

// 自定义连接地址
let ollama = Ollama::new("http://localhost".to_string(), 11434);

使用示例

生成文本补全

use ollama_rs::generation::completion::GenerationRequest;

let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();

let res = ollama.generate(GenerationRequest::new(model, prompt)).await;

if let Ok(res) = res {
    println!("{}", res.response);
}

流式生成文本补全

use ollama_rs::generation::completion::GenerationRequest;
use tokio::io::{self, AsyncWriteExt};
use tokio_stream::StreamExt;

let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();

let mut stream = ollama.generate_stream(GenerationRequest::new(model, prompt)).await.unwrap();

let mut stdout = io::stdout();
while let Some(res) = stream.next().await {
    let responses = res.unwrap();
    for resp in responses {
        stdout.write_all(resp.response.as_bytes()).await.unwrap();
        stdout.flush().await.unwrap();
    }
}

带选项的文本生成

use ollama_rs::generation::completion::GenerationRequest;
use ollama_rs::models::ModelOptions;

let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();

let options = ModelOptions::default()
    .temperature(0.2)
    .repeat_penalty(1.5)
    .top_k(25)
    .top_p(0.25);

let res = ollama.generate(GenerationRequest::new(model, prompt).options(options)).await;

if let Ok(res) = res {
    println!("{}", res.response);
}

聊天模式

use ollama_rs::generation::chat::{ChatMessage, ChatMessageRequest};
use ollama_rs::history::ChatHistory;

let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();
let mut history = vec![];

let res = ollama
    .send_chat_messages_with_history(
        &mut history, // <- 消息会保存到这里
        ChatMessageRequest::new(
            model,
            vec![ChatMessage::user(prompt)], // <- 只需提供一个消息
        ),
    )
    .await;

if let Ok(res) = res {
    println!("{}", res.message.content);
}

列出本地模型

let res = ollama.list_local_models().await.unwrap();

显示模型信息

let res = ollama.show_model_info("llama2:latest".to_string()).await.unwrap();

创建模型

use ollama_rs::models::create::CreateModelRequest;

let res = ollama.create_model(CreateModelRequest::path("model".into(), "/tmp/Modelfile.example"].into())).await.unwrap();

流式创建模型

use ollama_rs::models::create::CreateModelRequest;
use tokio_stream::StreamExt;

let mut res = ollama.create_model_stream(CreateModelRequest::path("model".into(), "/tmp/Modelfile.example".into())).await.unwrap();

while let Some(res) = res.next().await {
    let res = res.unwrap();
    // 处理状态
}

复制模型

let _ = ollama.copy_model("mario".into(), "mario_copy".into()).await.unwrap();

删除模型

let _ = ollama.delete_model("mario_copy".into()).await.unwrap();

生成嵌入向量

use ollama_rs::generation::embeddings::request::GenerateEmbeddingsRequest;

let request = GenerateEmbeddingsRequest::new("llama2:latest".to_string(), "Why is the sky blue?".into());
let res = ollama.generate_embeddings(request).await.unwrap();

批量生成嵌入向量

use ollama_rs::generation::embeddings::request::GenerateEmbeddingsRequest;

let request = GenerateEmbeddingsRequest::new("llama2:latest".to_string(), vec!["Why is the sky blue?", "Why is the sky red?"].into());
let res = ollama.generate_embeddings(request).await.unwrap();

函数调用

use ollama_rs::coordinator::Coordinator;
use ollama_rs::generation::chat::{ChatMessage, ChatMessageRequest};
use ollama_rs::generation::tools::implementations::{DDGSearcher, Scraper, Calculator};
use ollama_rs::models::ModelOptions;

let mut history = vec![];

let mut coordinator = Coordinator::new(ollama, "qwen2.5:32b".to_string(), history)
    .options(ModelOptions::default().num_ctx(16384))
    .add_tool(DDGSearcher::new())
    .add_tool(Scraper {})
    .add_tool(Calculator {});

let resp = coordinator
    .chat(vec![ChatMessage::user("What is the current oil price?")])
    .await.unwrap();

println!("{}", resp.message.content);

创建自定义工具

/// 获取指定城市的天气
///
/// * city - 要获取天气的城市
#[ollama_rs::function]
async fn get_weather(city: String) -> Result<String, Box<dyn std::error::Error + Sync + Send>> {
    let url = format!("https://wttr.in/{city}?format=%C+%t");
    let response = reqwest::get(&url).await?.text().await?;
    Ok(response)
}

带思考的文本生成

let model = "qwen3:latest".to_string();
let prompt = "Why is the sky blue?".to_string();

let res = ollama.generate(GenerationRequest::new(model, prompt).think(true)).await;

if let Ok(res) = res {
    println!("{}", res.response);
}

完整示例

这是一个简单的聊天机器人示例:

use ollama_rs::generation::chat::{ChatMessage, ChatMessageRequest};
use ollama_rs::history::ChatHistory;
use ollama_rs::Ollama;
use std::io::{self, Write};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // 初始化Ollama客户端
    let ollama = Ollama::default();
    
    // 使用llama2模型
    let model = "llama2:latest".to_string();
    
    // 聊天历史记录
    let mut history: Vec<ChatMessage> = vec![];
    
    loop {
        // 获取用户输入
        print!("You: ");
        io::stdout().flush()?;
        let mut input = String::new();
        io::stdin().read_line(&mut input)?;
        let input = input.trim();
        
        // 退出条件
        if input.eq_ignore_ascii_case("exit") {
            break;
        }
        
        // 发送消息并获取响应
        let res = ollama
            .send_chat_messages_with_history(
                &mut history,
                ChatMessageRequest::new(
                    model.clone(),
                    vec![ChatMessage::user(input.to_string())],
                ),
            )
            .await?;
        
        // 打印AI回复
        println!("AI: {}", res.message.content);
    }
    
    Ok(())
}

这个示例展示了如何使用ollama-rs创建一个简单的命令行聊天机器人。它会持续对话直到用户输入"exit"退出。

itying888 1楼

ollama-rs: Rust的Ollama大语言模型集成库

概述

ollama-rs是一个为Rust开发者提供的库，用于便捷地与Ollama大语言模型API交互并进行本地推理。它简化了在Rust应用中集成AI模型的过程，支持多种Ollama提供的模型。

主要特性

简单的API设计
支持同步和异步操作
本地模型推理能力
模型管理功能
流式响应处理

安装

在Cargo.toml中添加依赖：

[dependencies]
ollama-rs = "0.1"  # 请检查最新版本
tokio = { version = "1.0", features = ["full"] }  # 如果需要异步支持

基本使用方法

1. 初始化客户端

use ollama_rs::Ollama;

let ollama = Ollama::default();
// 或指定自定义URL
let ollama = Ollama::new("http://localhost:11434".to_string());

2. 列出可用模型

let models = ollama.list_local_models().await?;
println!("可用模型: {:?}", models);

3. 生成文本

use ollama_rs::generation::completion::request::GenerationRequest;

let prompt = "为什么Rust是一门优秀的系统编程语言？".to_string();

let res = ollama.generate(GenerationRequest::new("llama2".to_string(), prompt))
    .await?;

println!("回答: {}", res.response);

4. 流式响应

use ollama_rs::generation::completion::request::GenerationRequest;
use futures::StreamExt;

let mut stream = ollama.generate_stream(GenerationRequest::new("llama2".to_string(), prompt))
    .await?;

while let Some(res) = stream.next().await {
    match res {
        Ok(res) => print!("{}", res.response),
        Err(e) => eprintln!("错误: {}", e),
    }
}

高级用法

1. 自定义生成参数

use ollama_rs::generation::completion::request::GenerationRequest;

let request = GenerationRequest::new("llama2".to_string(), prompt)
    .temperature(0.7)  // 控制创造性
    .top_p(0.9)       // 控制多样性
    .max_tokens(500); // 最大token数

let res = ollama.generate(request).await?;

2. 创建和管理模型

// 拉取模型
ollama.pull_model("llama2".to_string(), None).await?;

// 删除模型
ollama.delete_model("llama2".to_string()).await?;

// 复制模型
ollama.copy_model("llama2".to_string(), "llama2-copy".to_string()).await?;

3. 嵌入生成

use ollama_rs::generation::embeddings::request::EmbeddingRequest;

let embedding = ollama.generate_embeddings(
    EmbeddingRequest::new("llama2".to_string(), "要嵌入的文本".to_string())
).await?;

println!("嵌入向量: {:?}", embedding.embedding);

错误处理

ollama-rs使用自定义错误类型处理各种可能的问题：

match ollama.generate(request).await {
    Ok(response) => println!("成功: {}", response),
    Err(ollama_rs::error::OllamaError::ApiError { status, message }) => {
        eprintln!("API错误: {} - {}", status, message)
    },
    Err(e) => eprintln!("其他错误: {}", e),
}

实际应用示例

构建简单的聊天机器人

use ollama_rs::{Ollama, generation::completion::request::GenerationRequest};
use std::io::{self, Write};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let ollama = Ollama::default();
    
    println!("聊天机器人已启动 (输入'exit'退出)");
    
    loop {
        print!("你: ");
        io::stdout().flush()?;
        
        let mut input = String::new();
        io::stdin().read_line(&mut input)?;
        
        if input.trim() == "exit" {
            break;
        }
        
        let response = ollama.generate(
            GenerationRequest::new("llama2".to_string(), input)
        ).await?;
        
        println!("AI: {}", response.response);
    }
    
    Ok(())
}

性能提示

对于长时间运行的应用程序，考虑重用Ollama客户端实例
使用流式响应可以提供更好的用户体验
调整生成参数可以平衡响应速度和质量

完整示例代码

下面是一个结合了基本使用和高级功能的完整示例：

use ollama_rs::{
    Ollama, 
    generation::{
        completion::{request::GenerationRequest, GenerationResponse},
        embeddings::{request::EmbeddingRequest, EmbeddingResponse},
    },
};
use futures::StreamExt;
use std::io::{self, Write};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // 初始化客户端
    let ollama = Ollama::default();
    
    // 1. 列出可用模型
    let models = ollama.list_local_models().await?;
    println!("本地可用模型: {:?}", models);
    
    // 2. 拉取新模型
    println!("正在拉取llama2模型...");
    ollama.pull_model("llama2".to_string(), None).await?;
    
    // 3. 生成文本
    let prompt = "用简单的方式解释Rust的所有权系统".to_string();
    let response = ollama.generate(
        GenerationRequest::new("llama2".to_string(), prompt)
            .temperature(0.7)
            .max_tokens(200)
    ).await?;
    println!("生成结果: {}", response.response);
    
    // 4. 流式生成
    let mut stream = ollama.generate_stream(
        GenerationRequest::new("llama2".to_string(), "Rust中的trait是什么？".to_string())
    ).await?;
    
    println!("流式响应:");
    while let Some(res) = stream.next().await {
        match res {
            Ok(res) => print!("{}", res.response),
            Err(e) => eprintln!("错误: {}", e),
        }
    }
    println!();
    
    // 5. 生成嵌入
    let embedding = ollama.generate_embeddings(
        EmbeddingRequest::new("llama2".to_string(), "这是要嵌入的文本".to_string())
    ).await?;
    println!("嵌入向量长度: {}", embedding.embedding.len());
    
    // 6. 交互式聊天
    println!("欢迎使用聊天机器人(输入'exit'退出)");
    let mut ollama = Ollama::new("http://localhost:11434".to_string()); // 重用客户端
    
    loop {
        print!("你: ");
        io::stdout().flush()?;
        
        let mut input = String::new();
        io::stdin().read_line(&mut input)?;
        
        if input.trim() == "exit" {
            break;
        }
        
        let response = ollama.generate(
            GenerationRequest::new("llama2".to_string(), input)
                .temperature(0.8)
        ).await?;
        
        println!("AI: {}", response.response);
    }
    
    Ok(())
}

这个完整示例展示了ollama-rs库的主要功能，包括：

初始化客户端
模型管理
文本生成
流式响应处理
嵌入生成
交互式聊天应用

使用前请确保已安装Ollama服务并运行在本地11434端口。

Rust AI模型集成库ollama-rs的使用，ollama-rs为Rust开发者提供便捷的Ollama大语言模型API调用与本地推理功能