Golang中如何确定匹配项的数量

package main

import (
	"fmt"
	"regexp"
	"sync"
)

func main() {
	r := "^(?P<a>x){1,3}(?P<b>123){0,2}(?P<a>x){1,3}$"

	getParams(r, "xxx")
}

// Regex store complier
var Regex sync.Map

func getParams(regEx, str string) (paramsMap map[string][]string) {
	var compRegEx *regexp.Regexp
	tmp, ok := Regex.Load(regEx)
	if !ok {
		compRegEx = regexp.MustCompile(regEx)
		Regex.Store(regEx, compRegEx)
	} else {
		// panic
		compRegEx, ok = tmp.(*regexp.Regexp)
		if !ok {
			return nil
		}
	}

	match := compRegEx. FindStringSubmatch(str)
	if len(match) == 0 {
		return nil
	}
	fmt.Println(match)
	return nil
}

我的问题是：如何确定“x”的匹配数量，第一个正则模式“(?Px){1,3}”匹配了多少？第二个模式“(?P x){1,3}”又匹配了多少？我不知道如何确定。谁能告诉我？非常感谢。

vueper 1楼

检查返回的匹配项长度？

更多关于Golang中如何确定匹配项的数量的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html

zlyuanteng 2楼

抱歉，我想更好地理解你的问题。你是想计算“x”出现的次数吗？还是我理解错了？

itying888 3楼

在Go语言中，要确定正则表达式中重复捕获组的匹配数量，可以使用FindAllStringSubmatchIndex或FindAllStringSubmatch来获取所有匹配的详细信息。对于命名捕获组，需要结合正则表达式的子匹配索引来分析。

以下是修改后的代码示例，展示如何确定每个命名捕获组的匹配数量：

package main

import (
	"fmt"
	"regexp"
	"sync"
)

func main() {
	r := "^(?P<a>x){1,3}(?P<b>123){0,2}(?P<a>x){1,3}$"
	getParams(r, "xxx")
}

var Regex sync.Map

func getParams(regEx, str string) (paramsMap map[string][]string) {
	var compRegEx *regexp.Regexp
	tmp, ok := Regex.Load(regEx)
	if !ok {
		compRegEx = regexp.MustCompile(regEx)
		Regex.Store(regEx, compRegEx)
	} else {
		compRegEx, ok = tmp.(*regexp.Regexp)
		if !ok {
			return nil
		}
	}

	// 获取所有子匹配的索引
	matches := compRegEx.FindAllStringSubmatchIndex(str, -1)
	if len(matches) == 0 {
		return nil
	}

	// 获取捕获组名称
	groupNames := compRegEx.SubexpNames()
	
	// 分析每个命名捕获组的匹配
	for i, name := range groupNames {
		if name == "" || name == "a" || name == "b" {
			// 对于每个匹配，检查该捕获组的索引
			for _, match := range matches {
				start := match[2*i]
				end := match[2*i+1]
				if start >= 0 && end >= 0 {
					fmt.Printf("捕获组 '%s' 匹配了子字符串: %s\n", name, str[start:end])
				}
			}
		}
	}

	// 如果要精确统计每个命名捕获组的匹配次数
	allMatches := compRegEx.FindAllStringSubmatch(str, -1)
	for _, match := range allMatches {
		for i, name := range groupNames {
			if name != "" && i < len(match) && match[i] != "" {
				fmt.Printf("捕获组 '%s' 匹配值: %s\n", name, match[i])
			}
		}
	}

	return nil
}

然而，对于你的具体问题，正则表达式^(?P<a>x){1,3}(?P<b>123){0,2}(?P<a>x){1,3}$中的两个(?P<a>x){1,3}是同一个命名捕获组a，在Go的正则表达式中，后面的匹配会覆盖前面的匹配。要分别统计它们，需要给它们不同的名称。

修改正则表达式，给第二个捕获组不同的名称：

package main

import (
	"fmt"
	"regexp"
)

func main() {
	r := "^(?P<a1>x){1,3}(?P<b>123){0,2}(?P<a2>x){1,3}$"
	compRegEx := regexp.MustCompile(r)
	str := "xxx"
	
	matches := compRegEx.FindAllStringSubmatch(str, -1)
	groupNames := compRegEx.SubexpNames()
	
	for _, match := range matches {
		for i, name := range groupNames {
			if name != "" && i < len(match) && match[i] != "" {
				fmt.Printf("捕获组 '%s' 匹配值: %s\n", name, match[i])
				// 统计匹配长度
				if name == "a1" || name == "a2" {
					fmt.Printf("捕获组 '%s' 匹配了 %d 个 'x'\n", name, len(match[i]))
				}
			}
		}
	}
}

输出示例：

捕获组 'a1' 匹配值: xxx
捕获组 'a1' 匹配了 3 个 'x'
捕获组 'a2' 匹配值: 
捕获组 'a2' 匹配了 0 个 'x'

对于输入"xxx"，第一个捕获组a1匹配了3个’x’，第二个捕获组a2没有匹配到任何内容（因为整个正则表达式需要匹配更多的字符）。