Golang实现分层LRU缓存 - 保障祖先完整性的go-lrutree库

Golang实现分层LRU缓存 - 保障祖先完整性的go-lrutree库大家好，

我想分享一个我构建的 Go 库，名为 go-lrutree。它是一个小型、线程安全、泛型的缓存，专门为树形结构数据设计。

它解决的问题：

流行的 LRU 缓存实现（例如 hashicorp/golang-lru）对于扁平的键值对工作得很好。

但是当你处理层次结构数据时——比如组织架构图、文件路径、类别树或地理位置——扁平缓存可能就不够用了。

例如：如果你缓存了一个城市，你可能希望它的州和国家也保持缓存状态。但传统的 LRU 淘汰机制可能会淘汰父节点，而子节点却保留着，从而破坏了逻辑结构。

go-lrutree 通过强制执行以下规则来解决这个问题：如果一个节点在缓存中，那么它的所有祖先节点也必须在缓存中。当你访问一个节点时，它的整个祖先链都会被标记为最近使用——从而保持链的完整性和淘汰安全性。

使用示例：

package main

import (
	"fmt"

	"github.com/vasayxtx/go-lrutree"
)

type OrgItem struct {
	Name string
}

func main() {
	// 创建一个最大容量为4个条目并带有淘汰回调的新缓存。
	cache := lrutree.NewCache[string, OrgItem](4, lrutree.WithOnEvict(func(node lrutree.CacheNode[string, OrgItem]) {
		fmt.Printf("Evicted: %s (key=%s, parent=%s)\n", node.Value.Name, node.Key, node.ParentKey)
	}))

	// 向缓存添加节点。
	_ = cache.AddRoot("company", OrgItem{"My Company"})
	_ = cache.Add("engineering", OrgItem{"Engineering department"}, "company")
	_ = cache.Add("frontend", OrgItem{"Frontend team"}, "engineering")
	_ = cache.Add("backend", OrgItem{"Backend team"}, "engineering")

	// 通过键获取值。
	// "frontend" 节点及其所有祖先节点（"engineering" 和 "company" 节点）被标记为最近使用。
	if cacheNode, ok := cache.Get("frontend"); ok {
		fmt.Printf("Get: %s (key=%s, parent=%s)\n", cacheNode.Value.Name, cacheNode.Key, cacheNode.ParentKey)
		// 输出: Get: Frontend team (key=frontend, parent=engineering)
	}

	// 获取从根节点到键为 "backend" 的节点的完整分支。
	// "backend"、"engineering" 和 "company" 节点被标记为最近使用。
	branch := cache.GetBranch("backend")
	for i, node := range branch {
		fmt.Printf("GetBranch[%d]: %s (key=%s, parent=%s)\n", i, node.Value.Name, node.Key, node.ParentKey)
	}
	// 输出:
	// GetBranch[0]: My Company (key=company, parent=)
	// GetBranch[1]: Engineering department (key=engineering, parent=company)
	// GetBranch[2]: Backend team (key=backend, parent=engineering)

	// 通过键窥视值，而不更新 LRU 顺序。
	if cacheNode, ok := cache.Peek("frontend"); ok {
		fmt.Printf("Peek: %s (key=%s, parent=%s)\n", cacheNode.Value.Name, cacheNode.Key, cacheNode.ParentKey)
		// 输出: Peek: Frontend team (key=frontend, parent=engineering)
	}

	// 添加一个新节点，超过了缓存的最大容量。
	// 最近最少使用的叶子节点（"frontend"）被淘汰。
	_ = cache.Add("architects", OrgItem{"Architects team"}, "engineering")
	// 输出: Evicted: Frontend team (key=frontend, parent=engineering)
}

期待您的反馈！

我很想听听 Go 社区的意见：

这种层次化缓存的概念对您有启发吗？您能想象出它的使用场景吗？
对 API 设计或实现方法有什么反馈吗？
对改进或功能有什么建议吗？

感谢您的关注！

更多关于Golang实现分层LRU缓存 - 保障祖先完整性的go-lrutree库的实战教程也可以访问 https://www.itying.com/category-94-b0.html

songsunli 1楼

更多关于Golang实现分层LRU缓存 - 保障祖先完整性的go-lrutree库的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html

这是一个非常棒的库，解决了传统LRU在处理层次化数据时的关键痛点。保障祖先节点的完整性对于文件系统、组织结构、分类目录等场景至关重要。以下是对你实现的一些技术分析和示例补充：

技术实现亮点：

祖先链完整性保障：通过强制要求节点祖先必须存在，避免了数据逻辑不一致
链式LRU更新：访问节点时自动更新整个祖先链的LRU状态，这是正确的设计选择
线程安全设计：适合并发场景下的树形数据缓存

示例代码扩展：

// 演示并发场景下的使用
func concurrentExample() {
    cache := lrutree.NewCache[string, *FileInfo](100, 
        lrutree.WithOnEvict(func(node lrutree.CacheNode[string, *FileInfo]) {
            fmt.Printf("Evicted file: %s\n", node.Value.Path)
        }))

    // 模拟多个goroutine并发访问文件路径
    var wg sync.WaitGroup
    paths := []string{"/usr/local/bin", "/usr/local/lib", "/var/log"}
    
    for _, path := range paths {
        wg.Add(1)
        go func(p string) {
            defer wg.Done()
            
            // 分解路径并确保所有父目录被缓存
            parts := strings.Split(p, "/")
            var parentKey string
            
            for i, part := range parts {
                if part == "" {
                    continue
                }
                key := strings.Join(parts[:i+1], "/")
                if i == 0 {
                    parentKey = ""
                }
                
                cache.Add(key, &FileInfo{
                    Path: key,
                    IsDir: true,
                }, parentKey)
                
                parentKey = key
            }
        }(path)
    }
    wg.Wait()
}

// 演示批量操作
func batchOperations() {
    cache := lrutree.NewCache[int, TreeNode](50)
    
    // 批量添加树节点
    nodes := []struct{
        key int
        value TreeNode
        parent int
    }{
        {1, TreeNode{Name: "Root"}, 0},
        {2, TreeNode{Name: "Child1"}, 1},
        {3, TreeNode{Name: "Child2"}, 1},
        {4, TreeNode{Name: "Grandchild1"}, 2},
    }
    
    for _, n := range nodes {
        if n.parent == 0 {
            cache.AddRoot(n.key, n.value)
        } else {
            cache.Add(n.key, n.value, n.parent)
        }
    }
    
    // 获取子树
    if subtree, ok := cache.GetSubtree(2); ok {
        for _, node := range subtree {
            fmt.Printf("Subtree node: %v\n", node.Value)
        }
    }
}

// 类型安全的包装器示例
type CategoryCache struct {
    cache *lrutree.Cache[string, Category]
}

func NewCategoryCache(capacity int) *CategoryCache {
    return &CategoryCache{
        cache: lrutree.NewCache[string, Category](capacity),
    }
}

func (cc *CategoryCache) AddCategory(id, name string, parentID string) error {
    if parentID == "" {
        return cc.cache.AddRoot(id, Category{
            ID:   id,
            Name: name,
        })
    }
    return cc.cache.Add(id, Category{
        ID:   id,
        Name: name,
    }, parentID)
}

func (cc *CategoryCache) GetCategoryChain(id string) []Category {
    branch := cc.cache.GetBranch(id)
    result := make([]Category, len(branch))
    for i, node := range branch {
        result[i] = node.Value
    }
    return result
}

性能考虑：

// 基准测试示例
func BenchmarkCacheOperations(b *testing.B) {
    cache := lrutree.NewCache[int, string](1000)
    
    // 构建深度树
    for i := 1; i <= 1000; i++ {
        parent := i / 2
        if parent == 0 {
            cache.AddRoot(i, fmt.Sprintf("Node%d", i))
        } else {
            cache.Add(i, fmt.Sprintf("Node%d", i), parent)
        }
    }
    
    b.ResetTimer()
    
    b.Run("GetLeafNode", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            cache.Get(999)
        }
    })
    
    b.Run("AddNewNode", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            cache.Add(1000+i, "NewNode", 500)
        }
    })
}

这个库的设计很好地解决了层次化数据缓存的特殊需求。GetBranch方法特别有用，可以一次性获取完整路径。线程安全的设计也使其适合生产环境使用。