Golang文件写入问题:如何解决打印结果未完全写入文本文件

Golang文件写入问题:如何解决打印结果未完全写入文本文件 我正在尝试使用Go语言的并发功能。Go协程打印出了所有预期的结果,但它没有将全部打印结果写入文件。它只写入了26行。

main.go

import (
	"bufio"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"os"
	"regexp"
	"strings"
	"sync"
)

var wg sync.WaitGroup
var mt sync.Mutex

// Final Literation
func main() {
	file, err := os.Open("ip.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer file.Close()
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		go Grabber(scanner.Text())
		wg.Add(1)

	}
	wg.Wait()

	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
}

// stringInArray do If string in list return true false otherwise.
func stringInArray(a string, list []string) bool {
	for _, b := range list {
		if b == a {
			return true
		}
	}
	return false
}

// Grabber Do Search the bing and collect array of sitelist
func Grabber(ip string) {
	defer wg.Done()
	var output []string
	outfile, err := os.Create("urls.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer outfile.Close()
	if ip == "" {

	}
	page := 1
	for page < 251 {
		client := &http.Client{}
		req, err := http.NewRequest(
			http.MethodGet,
			fmt.Sprintf(
				"http://www.bing.com/search?q=ip:%s+&count=50&first=1",
				ip,
			),
			nil,
		)
		if err != nil {

		}
		req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0")
		res, err := client.Do(req)
		if err != nil {
			fmt.Println("Invalid Request")
		}
		defer res.Body.Close()
		body, err := ioutil.ReadAll(res.Body)
		if err != nil {
			fmt.Println("Couldn't Read")
		}
		re := regexp.MustCompile(`<h2><a href="(.*?)"`)
		links := re.FindAllString(string(body), -1)
		if links != nil {
			for l := range links {
				o := strings.Split(links[l], `"`)
				d := strings.Split(o[1], "/")
				s := d[0] + "//" + d[2]
				if !stringInArray(s, output) {
					output = append(output, s)
				}
			}
		}
		page = page + 50
	}
	for _, links := range output {
		fmt.Println(links)
		fmt.Fprintln(outfile, links)
	}
}

Ip.txt

103.253.145.129
103.253.146.125
103.253.146.239
103.253.147.72
146.185.176.79
146.185.176.45
146.185.179.250
146.185.180.35
146.185.180.185
146.185.180.113
146.185.181.51
146.185.183.107
146.185.183.202
146.185.183.248
146.185.183.219
146.185.184.69
146.185.185.169

有人能解释一下吗?为什么会发生这种情况? git仓库 URLGrabber


更多关于Golang文件写入问题:如何解决打印结果未完全写入文本文件的实战教程也可以访问 https://www.itying.com/category-94-b0.html

2 回复

你启动了大量的 goroutine,它们都会创建并写入 urls.txt 文件,从而相互覆盖。

更多关于Golang文件写入问题:如何解决打印结果未完全写入文本文件的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html


问题在于每个goroutine都创建了自己的输出文件,导致并发写入冲突。每个Grabber函数都执行os.Create("urls.txt"),这会清空文件并重新开始写入。最终只有最后一个完成写入的goroutine的结果被保留。

解决方案是使用互斥锁保护文件写入,或者使用单个文件写入器。以下是修复后的代码:

import (
	"bufio"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"os"
	"regexp"
	"strings"
	"sync"
)

var wg sync.WaitGroup
var fileMutex sync.Mutex

func main() {
	file, err := os.Open("ip.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer file.Close()
	
	// 创建输出文件(只创建一次)
	outfile, err := os.Create("urls.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer outfile.Close()
	
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		wg.Add(1)
		go Grabber(scanner.Text(), outfile)
	}
	wg.Wait()

	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
}

func stringInArray(a string, list []string) bool {
	for _, b := range list {
		if b == a {
			return true
		}
	}
	return false
}

func Grabber(ip string, outfile *os.File) {
	defer wg.Done()
	var output []string
	
	if ip == "" {
		return
	}
	
	page := 1
	for page < 251 {
		client := &http.Client{}
		req, err := http.NewRequest(
			http.MethodGet,
			fmt.Sprintf(
				"http://www.bing.com/search?q=ip:%s+&count=50&first=1",
				ip,
			),
			nil,
		)
		if err != nil {
			continue
		}
		req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0")
		res, err := client.Do(req)
		if err != nil {
			fmt.Println("Invalid Request")
			continue
		}
		
		body, err := ioutil.ReadAll(res.Body)
		res.Body.Close()
		if err != nil {
			fmt.Println("Couldn't Read")
			continue
		}
		
		re := regexp.MustCompile(`<h2><a href="(.*?)"`)
		links := re.FindAllString(string(body), -1)
		if links != nil {
			for l := range links {
				o := strings.Split(links[l], `"`)
				d := strings.Split(o[1], "/")
				s := d[0] + "//" + d[2]
				if !stringInArray(s, output) {
					output = append(output, s)
				}
			}
		}
		page = page + 50
	}
	
	// 使用互斥锁保护文件写入
	fileMutex.Lock()
	for _, links := range output {
		fmt.Println(links)
		fmt.Fprintln(outfile, links)
	}
	fileMutex.Unlock()
}

关键修改:

  1. main函数中只创建一次输出文件
  2. 将文件句柄作为参数传递给Grabber函数
  3. 使用fileMutex互斥锁确保并发写入的安全性
  4. 修复了defer res.Body.Close()的位置问题
回到顶部