Nodejs 为什么从文件中读出来的内容md5值是错的？

	var fs = require(‘fs’);
var crypto = require(‘crypto’);
fs.readFile('1.txt', 'UTF-8', function(err, data) {
	if (err) {
		console.error(err);
	} else {
		var str = data.substr(0, data.length);
		var str1 = '123456\n123456\n';
		console.log(crypto.createHash('md5').update(data).digest('hex'));
		console.log(crypto.createHash('md5').update(str).digest('hex'));
		console.log(crypto.createHash('md5').update(str1).digest('hex'));
		console.log('***');
		console.log(data);
		console.log('***');
		console.log(str);
		console.log('***');
		console.log(str1);
		console.log('***');
		console.log(data === str);
		console.log('***');
		console.log(str1 == str);
		console.log('***');
		console.log(str1 === str);
		console.log('***');
		console.log(typeof data);
		console.log('***');
		console.log(typeof str);
		console.log('***');
		console.log(typeof str1);
		console.log('***');
	}
});

输出是这样： E:>node 1.js 069f06196d39d7be8e8d8195d66a141e 069f06196d39d7be8e8d8195d66a141e 89ca78c08cc4562d00e2cb2a1d67fcac *** 123456 123456

	***
	123456
	123456

	***
	123456
	123456

	***
	true
	***
	false
	***
	false
	***
	string
	***
	string
	***
	string
	***

前面两个md5是错的，为什么从文件中读出来的内容md5值是错的？

phonegap100 1楼

在这个问题中，你遇到了从文件中读取内容后计算MD5值不正确的情况。问题的根本原因在于 fs.readFile 方法的第二个参数 encoding 的设置。

在你的代码中，你使用了 'UTF-8' 作为编码参数，这意味着 data 变量将包含解码后的字符串。然而，当你将这个字符串传递给 crypto.createHash('md5').update(data) 时，它可能已经包含了不可见的字符或换行符，导致计算出的MD5值与预期不符。

为了修复这个问题，你可以直接读取二进制数据，而不是指定编码。然后，你可以手动处理这些数据以确保它们符合预期格式。

下面是修改后的代码：

var fs = require('fs');
var crypto = require('crypto');

fs.readFile('1.txt', function(err, data) {
    if (err) {
        console.error(err);
    } else {
        // 直接读取二进制数据
        var str1 = '123456\n123456\n';

        console.log(crypto.createHash('md5').update(data).digest('hex'));
        console.log(crypto.createHash('md5').update(str1).digest('hex'));

        console.log('***');
        console.log(data);
        console.log('***');
        console.log(str1);
        console.log('***');

        console.log(str1 === data.toString());
    }
});

在这个修改后的版本中，我们没有指定 encoding 参数，而是直接读取二进制数据。然后，我们将原始数据转换为字符串，并进行比较，以确保数据的一致性。

运行这段代码后，你应该会看到正确的MD5值，因为现在我们避免了由于编码问题引起的意外字符。

yibo5220 2楼

文本不贴不清楚 txt 的内容是\n\r（可能？）

nodeper 3楼

文本就 123456 123456

可能因为读入的换行标识不一样…在linux和windows下的md5不同 windows下换行是0d0a，但是这要怎么弄

wuwangju 4楼

text.split(/[\r]\n/)

ionicwang 5楼

嗯，是这样…谢谢，就是linux下会在文件尾自己加个0a的/n换行还要去掉最后一个/n

itying888 6楼

多谢，明白了

gougou168 7楼

不要指定编码, 直接读到buffer

yibo5220 8楼

从输出结果来看，data 和 str 的内容是一样的，并且它们计算出的 MD5 值也相同。但与 str1 的 MD5 值不同。这可能是因为 fs.readFile 方法默认以 UTF-8 编码读取文件内容，而文件中实际存储的是二进制数据。

为了解决这个问题，可以尝试以下两种方法：

读取文件时不指定编码，让 Node.js 返回原始的 Buffer 对象，然后再计算 MD5 值：

var fs = require('fs');
var crypto = require('crypto');

fs.readFile('1.txt', function(err, buffer) {
  if (err) {
    console.error(err);
  } else {
    var data = buffer.toString('utf8');
    var str = data.substr(0, data.length);
    var str1 = '123456\n123456\n';

    console.log(crypto.createHash('md5').update(buffer).digest('hex'));
    console.log(crypto.createHash('md5').update(data).digest('hex'));
    console.log(crypto.createHash('md5').update(str).digest('hex'));
    console.log(crypto.createHash('md5').update(str1).digest('hex'));
  }
});

如果确定文件内容是文本，可以在读取文件后手动去除尾部的换行符（如果有的话）：

var fs = require('fs');
var crypto = require('crypto');

fs.readFile('1.txt', 'UTF-8', function(err, data) {
  if (err) {
    console.error(err);
  } else {
    var str = data.replace(/\n$/, '');
    var str1 = '123456\n123456\n';

    console.log(crypto.createHash('md5').update(data).digest('hex'));
    console.log(crypto.createHash('md5').update(str).digest('hex'));
    console.log(crypto.createHash('md5').update(str1).digest('hex'));
  }
});

上述代码中的 replace(/\n$/, '') 会删除字符串末尾的换行符。