Golang中如何实现高效低成本的去重消息ID生成器
Golang中如何实现高效低成本的去重消息ID生成器 各位Gopher们好,
这是来自AWS官方文档的一段代码片段:
private static AmazonSNS snsClient;
private static final String MESSAGE_PAYLOAD = " 192.168.1.100 - - [28/Oct/2021:10:27:10 -0500] "GET /index.html HTTP/1.1" 200 3395";
private static final String MESSAGE_FIFO_GROUP = "server1234";
PublishRequest request = new PublishRequest()
.withTopicArn(topicArn)
.withMessage(MESSAGE_PAYLOAD)
.withMessageGroupId(MESSAGE_FIFO_GROUP)
.withMessageDeduplicationId(UUID.randomUUID().toString());
PublishResult response = snsClient.publish(request);
请注意它创建新消息去重ID的方式成本很高:它直接生成了一个新的UUID并使用它。
我想知道,你们是否有更经济的方式来创建去重ID?
更多关于Golang中如何实现高效低成本的去重消息ID生成器的实战教程也可以访问 https://www.itying.com/category-94-b0.html
3 回复
也许你可以在这里找到一些信息 atomic package - sync/atomic - pkg.go.dev
更多关于Golang中如何实现高效低成本的去重消息ID生成器的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html
为了在Go中轻松生成唯一ID,我使用这个非常有用的包:
GitHub - rs/xid: xid is a globally unique id generator thought for the web
如果你需要可排序的东西,也许可以看看这个雪花ID实现。
GitHub - bwmarrin/snowflake: A simple to use Go (golang) package to generate…
我希望这对你有所帮助。
在Golang中实现高效低成本的去重ID生成器,可以考虑以下几种方案:
1. 基于时间戳的ID生成器
package main
import (
"fmt"
"sync/atomic"
"time"
)
type TimestampIDGenerator struct {
lastTimestamp int64
sequence int64
}
func NewTimestampIDGenerator() *TimestampIDGenerator {
return &TimestampIDGenerator{
lastTimestamp: time.Now().UnixNano(),
sequence: 0,
}
}
func (g *TimestampIDGenerator) Generate() string {
now := time.Now().UnixNano()
seq := atomic.AddInt64(&g.sequence, 1)
// 如果时间戳相同,使用序列号区分
if now == atomic.LoadInt64(&g.lastTimestamp) {
return fmt.Sprintf("%d-%d", now, seq)
}
atomic.StoreInt64(&g.lastTimestamp, now)
atomic.StoreInt64(&g.sequence, 0)
return fmt.Sprintf("%d", now)
}
2. 基于Snowflake算法的ID生成器
package main
import (
"fmt"
"sync"
"time"
)
const (
workerIDBits = 5
sequenceBits = 12
maxWorkerID = -1 ^ (-1 << workerIDBits)
maxSequence = -1 ^ (-1 << sequenceBits)
timeShift = workerIDBits + sequenceBits
workerShift = sequenceBits
epoch = 1609459200000 // 2021-01-01 00:00:00 UTC
)
type SnowflakeIDGenerator struct {
mu sync.Mutex
workerID int64
sequence int64
lastStamp int64
}
func NewSnowflakeIDGenerator(workerID int64) *SnowflakeIDGenerator {
if workerID > maxWorkerID || workerID < 0 {
panic(fmt.Sprintf("worker ID must be between 0 and %d", maxWorkerID))
}
return &SnowflakeIDGenerator{
workerID: workerID,
}
}
func (g *SnowflakeIDGenerator) Generate() string {
g.mu.Lock()
defer g.mu.Unlock()
now := time.Now().UnixMilli()
if now == g.lastStamp {
g.sequence = (g.sequence + 1) & maxSequence
if g.sequence == 0 {
for now <= g.lastStamp {
now = time.Now().UnixMilli()
}
}
} else {
g.sequence = 0
}
g.lastStamp = now
id := ((now - epoch) << timeShift) |
(g.workerID << workerShift) |
g.sequence
return fmt.Sprintf("%d", id)
}
3. 基于消息内容哈希的ID生成器
package main
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"hash/fnv"
)
// 使用FNV哈希(更快但可能冲突)
func GenerateDedupIDByHash(message string) string {
h := fnv.New64a()
h.Write([]byte(message))
return fmt.Sprintf("%x", h.Sum64())
}
// 使用SHA256哈希(更安全但稍慢)
func GenerateDedupIDBySHA256(message string) string {
hash := sha256.Sum256([]byte(message))
return hex.EncodeToString(hash[:16]) // 取前16字节
}
// 结合时间戳和内容哈希
func GenerateHybridDedupID(message string) string {
timestamp := time.Now().UnixNano()
h := fnv.New64a()
h.Write([]byte(message))
contentHash := h.Sum64()
return fmt.Sprintf("%d-%x", timestamp, contentHash)
}
4. 使用递增序列的ID生成器
package main
import (
"fmt"
"sync/atomic"
)
type SequentialIDGenerator struct {
prefix string
counter uint64
}
func NewSequentialIDGenerator(prefix string) *SequentialIDGenerator {
return &SequentialIDGenerator{
prefix: prefix,
counter: 0,
}
}
func (g *SequentialIDGenerator) Generate() string {
id := atomic.AddUint64(&g.counter, 1)
return fmt.Sprintf("%s-%d", g.prefix, id)
}
// 带时间窗口的序列生成器
func (g *SequentialIDGenerator) GenerateWithTimestamp() string {
timestamp := time.Now().Unix()
id := atomic.AddUint64(&g.counter, 1)
return fmt.Sprintf("%d-%s-%d", timestamp, g.prefix, id)
}
5. 实际使用示例
package main
import (
"fmt"
"time"
)
func main() {
// 示例1: 使用时间戳生成器
tsGen := NewTimestampIDGenerator()
fmt.Println("Timestamp ID:", tsGen.Generate())
// 示例2: 使用Snowflake生成器
snowGen := NewSnowflakeIDGenerator(1)
fmt.Println("Snowflake ID:", snowGen.Generate())
// 示例3: 基于消息内容生成去重ID
message := "192.168.1.100 - - [28/Oct/2021:10:27:10 -0500] GET /index.html HTTP/1.1 200 3395"
dedupID := GenerateHybridDedupID(message)
fmt.Println("Hybrid Dedup ID:", dedupID)
// 示例4: 序列生成器
seqGen := NewSequentialIDGenerator("msg")
fmt.Println("Sequential ID:", seqGen.Generate())
}
性能对比
package main
import (
"testing"
"github.com/google/uuid"
)
func BenchmarkUUID(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = uuid.New().String()
}
}
func BenchmarkTimestampID(b *testing.B) {
gen := NewTimestampIDGenerator()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = gen.Generate()
}
}
func BenchmarkSnowflakeID(b *testing.B) {
gen := NewSnowflakeIDGenerator(1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = gen.Generate()
}
}
这些方案都比UUID生成更高效:
- 时间戳方案:简单快速,适合单机场景
- Snowflake方案:分布式友好,保证全局唯一
- 哈希方案:适合基于消息内容去重
- 序列方案:最高性能,适合高吞吐场景
选择哪种方案取决于你的具体需求:是否需要分布式支持、是否需要基于内容去重、以及对性能的要求程度。

