Rust序列化数据处理库serde_trim的使用,高效修剪与清理序列化数据中的空白字符

Rust序列化数据处理库serde_trim的使用,高效修剪与清理序列化数据中的空白字符

支持修剪的类型

  • String
  • Option<String>
  • Vec<String>
  • BTreeSet<String>
  • HashSet<String>
  • VecDeque<String>
  • LinkedList<String>
  • BinaryHeap<String>

支持多种标准集合类型,当前也支持非空修剪

支持的功能集合

  • binaryheap_non_empty_string_trim
  • binaryheap_string_trim
  • btreeset_non_empty_string_trim
  • btreeset_string_trim
  • hashset_non_empty_string_trim
  • hashset_string_trim
  • inkedlist_non_empty_string_trim
  • linkedlist_string_trim
  • option_string_trim
  • string_trim
  • vec_non_empty_string_trim
  • vec_string_trim
  • vecdeque_non_empty_string_trim
  • vecdeque_string_trim

如何使用

以下是内容中提供的示例代码:

use serde_derive::Deserialize;
use serde_trim::*;
use std::collections::*;

fn main() {
    // 示例1: 基本String修剪
    #[derive(Deserialize)]
    struct Foo {
        #[serde(deserialize_with = "string_trim")]
        name: String,
    }
    let json = r#"{"name":" "}"#;
    let foo = serde_json::from_str::<Foo>(json).unwrap();
    assert_eq!(foo.name, "");

    // 示例2: Option<String>修剪
    #[derive(Deserialize)]
    struct OptionFoo {
        #[serde(deserialize_with = "option_string_trim")]
        name: Option<String>,
    }
    let json = r#"{"name":" "}"#;
    let foo = serde_json::from_str::<OptionFoo>(json).unwrap();
    assert_eq!(foo.name, None);

    // 示例3: 带默认值的Option<String>修剪
    #[derive(Deserialize)]
    struct OptionBar {
        #[serde(default, deserialize_with = "option_string_trim")]
        name: Option<String>,
        addr: String,
    }
    let json = r#"{"addr":"ABC"}"#;
    let foo = serde_json::from_str::<OptionBar>(json).unwrap();
    assert_eq!(foo.name, None);
    assert_eq!(foo.addr, "ABC");

    // 示例4: Vec<String>修剪
    #[derive(Deserialize)]
    struct VecFoo {
        #[serde(deserialize_with = "vec_string_trim")]
        name: Vec<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<VecFoo>(json).unwrap();
    assert_eq!(foo.name, vec!["", "foo", "b ar", "hello", "rust"]);

    // 示例5: BTreeSet<String>修剪
    #[derive(Deserialize)]
    struct BTreeSetFoo {
        #[serde(deserialize_with = "btreeset_string_trim")]
        name: BTreeSet<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<BTreeSetFoo>(json).unwrap();
    let expected: BTreeSet<String> = BTreeSet::from_iter([
        "".into(),
        "foo".into(),
        "b ar".into(),
        "hello".into(),
        "rust".into(),
    ]);
    assert_eq!(foo.name, expected);

    // 示例6: HashSet<String>修剪
    #[derive(Deserialize)]
    struct HashSetFoo {
        #[serde(deserialize_with = "hashset_string_trim")]
        name: HashSet<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<HashSetFoo>(json).unwrap();
    let expected: HashSet<String> = HashSet::from_iter([
        "".into(),
        "foo".into(),
        "b ar".into(),
        "hello".into(),
        "rust".into(),
    ]);
    assert_eq!(foo.name, expected);

    // 示例7: VecDeque<String>修剪
    #[derive(Deserialize)]
    struct VecDequeFoo {
        #[serde(deserialize_with = "vecdeque_string_trim")]
        name: VecDeque<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<VecDequeFoo>(json).unwrap();
    assert_eq!(foo.name, vec!["", "foo", "b ar", "hello", "rust"]);

    // 示例8: LinkedList<String>修剪
    #[derive(Deserialize)]
    struct LinkedListFoo {
        #[serde(deserialize_with = "linkedlist_string_trim")]
        name: LinkedList<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<LinkedListFoo>(json).unwrap();
    assert_eq!(
        foo.name,
        LinkedList::from_iter([
            "".into(),
            "foo".into(),
            "b ar".into(),
            "hello".into(),
            "rust".into(),
        ])
    );

    // 示例9: BinaryHeap<String>修剪
    #[derive(Deserialize)]
    struct BinaryHeapFoo {
        #[serde(deserialize_with = "binaryheap_string_trim")]
        name: BinaryHeap<String>,
    }
    let json = r#"{"name":["   ","foo","b ar","hello ","  rust"]}"#;
    let foo = serde_json::from_str::<BinaryHeapFoo>(json).unwrap();
    assert_eq!(
        foo.name.into_vec(),
        vec!["rust", "hello", "b ar", "", "foo"]
    );
}

完整示例

以下是使用serde_trim的完整示例代码:

use serde::{Deserialize, Serialize};
use serde_trim::*;
use std::collections::*;

fn main() {
    // 示例1: 用户信息修剪
    #[derive(Debug, Deserialize, Serialize)]
    struct User {
        #[serde(deserialize_with = "string_trim")]
        username: String,
        #[serde(deserialize_with = "string_trim")]
        email: String,
    }

    let json = r#"{"username":"  john_doe  ","email":"  john@example.com  "}"#;
    let user: User = serde_json::from_str(json).unwrap();
    println!("修剪后的用户信息: {:?}", user); // 用户名和邮箱的空白已被修剪
    
    // 示例2: 用户资料修剪
    #[derive(Debug, Deserialize)]
    struct Profile {
        #[serde(deserialize_with = "option_string_trim")]
        bio: Option<String>,
        #[serde(deserialize_with = "option_string_trim")]
        website: Option<String>,
    }

    let json = r#"{"bio":"  This is my bio  ","website":"  "}"#;
    let profile: Profile = serde_json::from_str(json).unwrap();
    println!("修剪后的资料信息: {:?}", profile); // 空白website变为None
    
    // 示例3: 标签列表修剪
    #[derive(Debug, Deserialize)]
    struct Tags {
        #[serde(deserialize_with = "vec_string_trim")]
        items: Vec<String>,
    }

    let json = r#"{"items":["  rust  ","  serde ","  json ","  "],"}"#;
    let tags: Tags = serde_json::from_str(json).unwrap();
    println!("修剪后的标签: {:?}", tags); // 所有字符串项都被修剪
    
    // 示例4: 唯一单词集合修剪
    #[derive(Debug, Deserialize)]
    struct UniqueWords {
        #[serde(deserialize_with = "hashset_string_trim")]
        words: HashSet<String>,
    }

    let json = r#"{"words":["  hello ","  world ","  hello ","  rust  "]}"#;
    let unique_words: UniqueWords = serde_json::from_str(json).unwrap();
    println!("修剪后的唯一单词集合: {:?}", unique_words); // 修剪并去重
}

安装

在项目中添加以下依赖到Cargo.toml:

[dependencies]
serde_trim = "1.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

serde_trim提供了一种简单高效的方式来处理序列化数据中的空白字符,适用于各种常见的数据结构和集合类型,可以帮助开发者轻松实现数据清理和规范化。


1 回复

serde_trim - Rust中高效修剪序列化数据空白字符的库

serde_trim 是一个专门为Rust设计的序列化数据处理库,主要用于在序列化和反序列化过程中自动修剪字符串字段中的空白字符(如空格、制表符、换行符等)。

功能特点

  1. 自动修剪字符串两端的空白字符
  2. 支持自定义修剪规则
  3. 与serde无缝集成
  4. 高性能,几乎不影响序列化/反序列化速度

安装方法

在Cargo.toml中添加依赖:

[dependencies]
serde_trim = "0.1"
serde = { version = "1.0", features = ["derive"] }

基本使用方法

1. 自动修剪结构体字段

use serde::{Deserialize, Serialize};
use serde_trim::string_trim;

#[derive(Debug, Serialize, Deserialize)]
struct User {
    #[serde(with = "string_trim")]
    username: String,
    #[serde(with = "string_trim")]
    email: String,
    age: u32,
}

fn main() {
    let json_data = r#"
        {
            "username": "  john_doe  ",
            "email": "  john@example.com  \n",
            "age": 30
        }
    "#;

    let user: User = serde_json::from_str(json_data).unwrap();
    println!("{:?}", user);
    // 输出: User { username: "john_doe", email: "john@example.com", age: 30 }
}

2. 全局修剪所有字符串字段

use serde::{Deserialize, Serialize};
use serde_trim::all_string_trim;

#[derive(Debug, Serialize, Deserialize)]
#[serde_with = "all_string_trim"]
struct Profile {
    name: String,
    address: String,
    bio: String,
}

fn main() {
    let json_data = r#"
        {
            "name": "  Alice  ",
            "address": "  123 Main St  \t",
            "bio": "  Software developer  \n"
        }
    "#;

    let profile: Profile = serde_json::from_str(json_data).unwrap();
    println!("{:?}", profile);
    // 输出: Profile { name: "Alice", address: "123 Main St", bio: "Software developer" }
}

3. 自定义修剪函数

use serde::{Deserialize, Serialize};
use serde_trim::custom_trim;

fn custom_trim_fn(s: &str) -> String {
    s.trim().to_uppercase()
}

#[derive(Debug, Serialize, Deserialize)]
struct Product {
    #[serde(with = "custom_trim::<custom_trim_fn>")]
    name: String,
    #[serde(with = "custom_trim::<custom_trim_fn>")]
    category: String,
}

fn main() {
    let json_data = r#"
        {
            "name": "  wireless mouse  ",
            "category": "  computer accessories  "
        }
    "#;

    let product: Product = serde_json::from_str(json_data).unwrap();
    println!("{:?}", product);
    // 输出: Product { name: "WIRELESS MOUSE", category: "COMPUTER ACCESSORIES" }
}

高级用法

处理Option<String>类型

use serde::{Deserialize, Serialize};
use serde_trim::option_string_trim;

#[derive(Debug, Serialize, Deserialize)]
struct Order {
    #[serde(with = "option_string_trim")]
    notes: Option<String>,
    id: u64,
}

fn main() {
    let json_data = r#"
        {
            "notes": "  urgent delivery  ",
            "id": 42
        }
    "#;

    let order: Order = serde_json::from_str(json_data).unwrap();
    println!("{:?}", order);
    // 输出: Order { notes: Some("urgent delivery"), id: 42 }
}

处理Vec<String>类型

use serde::{Deserialize, Serialize};
use serde_trim::vec_string_trim;

#[derive(Debug, Serialize, Deserialize)]
struct BlogPost {
    title: String,
    #[serde(with = "vec_string_trim")]
    tags: Vec<String>,
}

fn main() {
    let json_data = r#"
        {
            "title": "Rust Programming",
            "tags": ["  rust  ", "  serde  ", "  \nperformance  "]
        }
    "#;

    let post: BlogPost = serde_json::from_str(json_data).unwrap();
    println!("{:?}", post);
    // 输出: BlogPost { title: "Rust Programming", tags: ["rust", "serde", "performance"] }
}

完整示例

以下是一个整合了多种功能的完整示例:

use serde::{Deserialize, Serialize};
use serde_trim::{string_trim, all_string_trim, option_string_trim, vec_string_trim, custom_trim};

// 自定义修剪函数
fn custom_format(s: &str) -> String {
    s.trim().to_lowercase().replace(' ', "_")
}

// 使用全局修剪的结构体
#[derive(Debug, Serialize, Deserialize)]
#[serde_with = "all_string_trim"]
struct GlobalTrimExample {
    field1: String,
    field2: String,
}

// 使用字段级修剪的结构体
#[derive(Debug, Serialize, Deserialize)]
struct FieldTrimExample {
    #[serde(with = "string_trim")]
    name: String,
    #[serde(with = "option_string_trim")]
    description: Option<String>,
    #[serde(with = "vec_string_trim")]
    items: Vec<String>,
    #[serde(with = "custom_trim::<custom_format>")]
    formatted: String,
}

fn main() {
    // 测试全局修剪
    let global_data = r#"
        {
            "field1": "  HELLO  ",
            "field2": "  WORLD  \t"
        }
    "#;
    let global: GlobalTrimExample = serde_json::from_str(global_data).unwrap();
    println!("全局修剪示例: {:?}", global);

    // 测试字段级修剪
    let field_data = r#"
        {
            "name": "  Test User  ",
            "description": "  Some description  ",
            "items": ["  item1  ", "  item2  "],
            "formatted": "  This Should Be Formatted  "
        }
    "#;
    let field: FieldTrimExample = serde_json::from_str(field_data).unwrap();
    println!("字段级修剪示例: {:?}", field);
}

性能建议

  1. 对于大型数据结构,推荐使用#[serde_with = "all_string_trim"]而不是逐个字段标注,可以减少运行时开销
  2. 如果只需要在反序列化时修剪,可以使用#[serde(deserialize_with = "...")]来指定仅反序列化时修剪

serde_trim是处理用户输入或外部数据时非常有用的工具,可以避免因空白字符导致的数据不一致问题。

回到顶部