Golang中如何实现高效低成本的去重消息ID生成器

Golang中如何实现高效低成本的去重消息ID生成器 各位Gopher们好,

这是来自AWS官方文档的一段代码片段:

private static AmazonSNS snsClient;
private static final String MESSAGE_PAYLOAD = " 192.168.1.100 - - [28/Oct/2021:10:27:10 -0500] "GET /index.html HTTP/1.1" 200 3395";
private static final String MESSAGE_FIFO_GROUP = "server1234";

PublishRequest request = new PublishRequest()
    .withTopicArn(topicArn)
    .withMessage(MESSAGE_PAYLOAD)
    .withMessageGroupId(MESSAGE_FIFO_GROUP)
    .withMessageDeduplicationId(UUID.randomUUID().toString());
PublishResult response = snsClient.publish(request);

请注意它创建新消息去重ID的方式成本很高:它直接生成了一个新的UUID并使用它。

我想知道,你们是否有更经济的方式来创建去重ID?


更多关于Golang中如何实现高效低成本的去重消息ID生成器的实战教程也可以访问 https://www.itying.com/category-94-b0.html

3 回复

也许你可以在这里找到一些信息 atomic package - sync/atomic - pkg.go.dev

更多关于Golang中如何实现高效低成本的去重消息ID生成器的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html


为了在Go中轻松生成唯一ID,我使用这个非常有用的包:

GitHub - rs/xid: xid is a globally unique id generator thought for the web

如果你需要可排序的东西,也许可以看看这个雪花ID实现。

GitHub - bwmarrin/snowflake: A simple to use Go (golang) package to generate…

我希望这对你有所帮助。

在Golang中实现高效低成本的去重ID生成器,可以考虑以下几种方案:

1. 基于时间戳的ID生成器

package main

import (
    "fmt"
    "sync/atomic"
    "time"
)

type TimestampIDGenerator struct {
    lastTimestamp int64
    sequence      int64
}

func NewTimestampIDGenerator() *TimestampIDGenerator {
    return &TimestampIDGenerator{
        lastTimestamp: time.Now().UnixNano(),
        sequence:      0,
    }
}

func (g *TimestampIDGenerator) Generate() string {
    now := time.Now().UnixNano()
    seq := atomic.AddInt64(&g.sequence, 1)
    
    // 如果时间戳相同,使用序列号区分
    if now == atomic.LoadInt64(&g.lastTimestamp) {
        return fmt.Sprintf("%d-%d", now, seq)
    }
    
    atomic.StoreInt64(&g.lastTimestamp, now)
    atomic.StoreInt64(&g.sequence, 0)
    return fmt.Sprintf("%d", now)
}

2. 基于Snowflake算法的ID生成器

package main

import (
    "fmt"
    "sync"
    "time"
)

const (
    workerIDBits     = 5
    sequenceBits     = 12
    maxWorkerID      = -1 ^ (-1 << workerIDBits)
    maxSequence      = -1 ^ (-1 << sequenceBits)
    timeShift        = workerIDBits + sequenceBits
    workerShift      = sequenceBits
    epoch            = 1609459200000 // 2021-01-01 00:00:00 UTC
)

type SnowflakeIDGenerator struct {
    mu        sync.Mutex
    workerID  int64
    sequence  int64
    lastStamp int64
}

func NewSnowflakeIDGenerator(workerID int64) *SnowflakeIDGenerator {
    if workerID > maxWorkerID || workerID < 0 {
        panic(fmt.Sprintf("worker ID must be between 0 and %d", maxWorkerID))
    }
    return &SnowflakeIDGenerator{
        workerID: workerID,
    }
}

func (g *SnowflakeIDGenerator) Generate() string {
    g.mu.Lock()
    defer g.mu.Unlock()

    now := time.Now().UnixMilli()
    
    if now == g.lastStamp {
        g.sequence = (g.sequence + 1) & maxSequence
        if g.sequence == 0 {
            for now <= g.lastStamp {
                now = time.Now().UnixMilli()
            }
        }
    } else {
        g.sequence = 0
    }

    g.lastStamp = now
    
    id := ((now - epoch) << timeShift) |
        (g.workerID << workerShift) |
        g.sequence
    
    return fmt.Sprintf("%d", id)
}

3. 基于消息内容哈希的ID生成器

package main

import (
    "crypto/sha256"
    "encoding/hex"
    "fmt"
    "hash/fnv"
)

// 使用FNV哈希(更快但可能冲突)
func GenerateDedupIDByHash(message string) string {
    h := fnv.New64a()
    h.Write([]byte(message))
    return fmt.Sprintf("%x", h.Sum64())
}

// 使用SHA256哈希(更安全但稍慢)
func GenerateDedupIDBySHA256(message string) string {
    hash := sha256.Sum256([]byte(message))
    return hex.EncodeToString(hash[:16]) // 取前16字节
}

// 结合时间戳和内容哈希
func GenerateHybridDedupID(message string) string {
    timestamp := time.Now().UnixNano()
    h := fnv.New64a()
    h.Write([]byte(message))
    contentHash := h.Sum64()
    return fmt.Sprintf("%d-%x", timestamp, contentHash)
}

4. 使用递增序列的ID生成器

package main

import (
    "fmt"
    "sync/atomic"
)

type SequentialIDGenerator struct {
    prefix string
    counter uint64
}

func NewSequentialIDGenerator(prefix string) *SequentialIDGenerator {
    return &SequentialIDGenerator{
        prefix: prefix,
        counter: 0,
    }
}

func (g *SequentialIDGenerator) Generate() string {
    id := atomic.AddUint64(&g.counter, 1)
    return fmt.Sprintf("%s-%d", g.prefix, id)
}

// 带时间窗口的序列生成器
func (g *SequentialIDGenerator) GenerateWithTimestamp() string {
    timestamp := time.Now().Unix()
    id := atomic.AddUint64(&g.counter, 1)
    return fmt.Sprintf("%d-%s-%d", timestamp, g.prefix, id)
}

5. 实际使用示例

package main

import (
    "fmt"
    "time"
)

func main() {
    // 示例1: 使用时间戳生成器
    tsGen := NewTimestampIDGenerator()
    fmt.Println("Timestamp ID:", tsGen.Generate())
    
    // 示例2: 使用Snowflake生成器
    snowGen := NewSnowflakeIDGenerator(1)
    fmt.Println("Snowflake ID:", snowGen.Generate())
    
    // 示例3: 基于消息内容生成去重ID
    message := "192.168.1.100 - - [28/Oct/2021:10:27:10 -0500] GET /index.html HTTP/1.1 200 3395"
    dedupID := GenerateHybridDedupID(message)
    fmt.Println("Hybrid Dedup ID:", dedupID)
    
    // 示例4: 序列生成器
    seqGen := NewSequentialIDGenerator("msg")
    fmt.Println("Sequential ID:", seqGen.Generate())
}

性能对比

package main

import (
    "testing"
    "github.com/google/uuid"
)

func BenchmarkUUID(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = uuid.New().String()
    }
}

func BenchmarkTimestampID(b *testing.B) {
    gen := NewTimestampIDGenerator()
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = gen.Generate()
    }
}

func BenchmarkSnowflakeID(b *testing.B) {
    gen := NewSnowflakeIDGenerator(1)
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = gen.Generate()
    }
}

这些方案都比UUID生成更高效:

  • 时间戳方案:简单快速,适合单机场景
  • Snowflake方案:分布式友好,保证全局唯一
  • 哈希方案:适合基于消息内容去重
  • 序列方案:最高性能,适合高吞吐场景

选择哪种方案取决于你的具体需求:是否需要分布式支持、是否需要基于内容去重、以及对性能的要求程度。

回到顶部