Golang读取XML、修改值并回写时遇到文件损坏问题
Golang读取XML、修改值并回写时遇到文件损坏问题 尝试读取一个XML文件(这里是一个Scribus文件),更改一些值,然后将修改后的XML写回文件。
当尝试使用encoding/xml进行此操作时,最终会得到一个损坏的文件。
以下是我的代码:
// Read a Scribus document and change text, then save output Scribus document
// FIXME: This seems to cripple the XML file
// It cannot be opened by Scribus anymore
package main
import (
"encoding/xml"
"fmt"
"io/ioutil"
"os"
)
// https://wiki.scribus.net/canvas/File_Format_Specification_for_Scribus_1.5
// Struct generated using an example Scribus document with https://www.onlinetool.io/xmltogo/
// TODO: Improve completeness
type SCRIBUSUTF8NEW struct {
XMLName xml.Name `xml:"SCRIBUSUTF8NEW"`
Text string `xml:",chardata"`
Version string `xml:"Version,attr"`
以下是测试文件:
<?xml version="1.0" encoding="UTF-8"?>
<SCRIBUSUTF8NEW Version="1.5.1.svn">
<DOCUMENT ANZPAGES="1" PAGEWIDTH="612" PAGEHEIGHT="792" BORDERLEFT="40" BORDERRIGHT="40" BORDERTOP="40" BORDERBOTTOM="40" PRESET="0" BleedTop="0" BleedLeft="0" BleedRight="0" BleedBottom="0" ORIENTATION="0" PAGESIZE="Letter" FIRSTNUM="1" BOOK="0" AUTOSPALTEN="1" ABSTSPALTEN="11" UNITS="2" DFONT="Abyssinica SIL Regular" DSIZE="12" DCOL="1" DGAP="0" TabFill="" TabWidth="36" AUTHOR="" COMMENTS="" KEYWORDS="" PUBLISHER="" DOCDATE="" DOCTYPE="" DOCFORMAT="" DOCIDENT="" DOCSOURCE="" DOCLANGINFO="" DOCRELATION="" DOCCOVER="" DOCRIGHTS="" DOCCONTRIB="" TITLE="" SUBJECT="" VHOCH="33" VHOCHSC="66" VTIEF="33" VTIEFSC="66" VKAPIT="75" BASEGRID="14.4" BASEO="0" AUTOL="100" UnderlinePos="-1" UnderlineWidth="-1" StrikeThruPos="-1" StrikeThruWidth="-1" GROUPC="1" HCMS="0" DPSo="0" DPSFo="0" DPuse="0" DPgam="0" DPbla="1" DPPr="Fogra27L CMYK Coated Press" DPIn="sRGB IEC61966-2.1" DPInCMYK="Fogra27L CMYK Coated Press" DPIn2="sRGB IEC61966-2.1" DPIn3="Fogra27L CMYK Coated Press" DISc="1" DIIm="0" ALAYER="0" LANGUAGE="en_GB" MINWORDLEN="3" HYCOUNT="2" AUTOMATIC="1" AUTOCHECK="0" GUIDELOCK="0" SnapToGuides="1" SnapToGrid="0" SnapToElement="0" MINGRID="20" MAJGRID="100" SHOWGRID="0" SHOWGUIDES="1" showcolborders="1" previewMode="0" SHOWFRAME="1" SHOWControl="0" SHOWLAYERM="0" SHOWMARGIN="1" SHOWBASE="0" SHOWPICT="1" SHOWLINK="0" rulerMode="1" showrulers="1" showBleed="1" rulerXoffset="0" rulerYoffset="0" GuideRad="10" GRAB="4" POLYC="4" POLYF="0.5" POLYR="0" POLYIR="0" POLYCUR="0" POLYOCUR="0" POLYS="0" arcStartAngle="30" arcSweepAngle="300" spiralStartAngle="0" spiralEndAngle="1080" spiralFactor="1.2" AutoSave="1" AutoSaveTime="600000" ScratchBottom="20" ScratchLeft="100" ScratchRight="100" ScratchTop="20" GapHorizontal="0" GapVertical="40" StartArrow="0" EndArrow="0" PEN="Black" BRUSH="None" PENLINE="Black" PENTEXT="Black" StrokeText="Black" TextBackGround="None" TextLineColor="None" TextBackGroundShade="100" TextLineShade="100" TextPenShade="100" TextStrokeShade="100" STIL="1" STILLINE="1" WIDTH="1" WIDTHLINE="1" PENSHADE="100" LINESHADE="100" BRUSHSHADE="100" CPICT="None" PICTSHADE="100" CSPICT="None" PICTSSHADE="100" PICTSCX="1" PICTSCY="1" PSCALE="1" PASPECT="1" EmbeddedPath="0" HalfRes="1" dispX="10" dispY="10" constrain="15" MINORC="#00ff00" MAJORC="#00ff00" GuideC="#000080" BaseC="#c0c0c0" renderStack="0 1 2 3 4" GridType="0" PAGEC="#ffffff" MARGC="#0000ff" RANDF="0" currentProfile="PostScript" calligraphicPenFillColor="Black" calligraphicPenLineColor="Black" calligraphicPenFillColorShade="100" calligraphicPenLineColorShade="100" calligraphicPenLineWidth="1" calligraphicPenAngle="0" calligraphicPenWidth="10" calligraphicPenStyle="1">
<CheckProfile Name="PDF 1.3" ignoreErrors="0" autoCheck="1" checkGlyphs="1" checkOrphans="1" checkOverflow="1" checkPictures="1" checkPartFilledImageFrames="0" checkResolution="1" checkTransparency="1" minResolution="144" maxResolution="2400" checkAnnotations="0" checkRasterPDF="1" checkForGIF="1" ignoreOffLayers="0" checkNotCMYKOrSpot="0" checkDeviceColorsAndOutputIntent="0" checkFontNotEmbedded="1" checkFontIsOpenType="1" checkAppliedMasterDifferentSide="1" checkEmptyTextFrames="1"/>
<CheckProfile Name="PDF 1.4" ignoreErrors="0" autoCheck="1" checkGlyphs="1" checkOrphans="1" checkOverflow="1" checkPictures="1" checkPartFilledImageFrames="0" checkResolution="1" checkTransparency="0" minResolution="144" maxResolution="2400" checkAnnotations="0" checkRasterPDF="1" checkForGIF="1" ignoreOffLayers="0" checkNotCMYKOrSpot="0" checkDeviceColorsAndOutputIntent="0" checkFontNotEmbedded="1" checkFontIsOpenType="1" checkAppliedMasterDifferentSide="1" checkEmptyTextFrames="1"/>
<CheckProfile Name="PDF 1.5" ignoreErrors="0" autoCheck="1" checkGlyphs="1" checkOrphans="1" checkOverflow="1" checkPictures="1" checkPartFilledImageFrames="0" checkResolution="1" checkTransparency="0" minResolution="144" maxResolution="2400" checkAnnotations="0" checkRasterPDF="1" checkForGIF="1" ignoreOffLayers="0" checkNotCMYKOrSpot="0" checkDeviceColorsAndOutputIntent="0" checkFontNotEmbedded="1" checkFontIsOpenType="1" checkAppliedMasterDifferentSide="1" checkEmptyTextFrames="1"/>
<CheckProfile Name="PDF/X-1a" ignoreErrors="0" autoCheck="1" checkGlyphs="1" checkOrphans="1" checkOverflow="1" checkPictures="1" checkPartFilledImageFrames="0" checkResolution="1" checkTransparency="1" minResolution="144" maxResolution="2400" checkAnnotations="1" checkRasterPDF="1" checkForGIF="1" ignoreOffLayers="0" checkNotCMYKOrSpot="1" checkDeviceColorsAndOutputIntent="0" checkFontNotEmbedded="1" checkFontIsOpenType="1" checkAppliedMasterDifferentSide="1" checkEmptyTextFrames="1"/>
<CheckProfile Name="PDF/X-3" ignoreErrors="0" autoCheck="1" checkGlyphs="1" checkOrphans="1" checkOverflow="1" checkPictures="1" checkPartFilledImageFrames="0" checkResolution="1" checkTransparency="1" minResolution="144" maxResolution="2400" check
更改后的结果文件应该能够用Scribus打开,但我无法使其正常工作。
非常感谢任何帮助。
更多关于Golang读取XML、修改值并回写时遇到文件损坏问题的实战教程也可以访问 https://www.itying.com/category-94-b0.html
感谢您的帮助。
- 添加了
<?xml version="1.0" encoding="UTF-8"?>- 这很简单(类似于 https://play.golang.org/p/Rbfb717tvh) - 末尾的换行符 - 无关紧要,Scribus 可以读取末尾有或没有换行符的文件
<TableBorderLine Width="1" PenStyle="1" Color="Black" Shade="100"/>被重写为<TableBorderLine Width="1" PenStyle="1" Color="Black" Shade="100"></TableBorderLine>- 这不太好,因为它增加了文件大小,但 Scribus 仍然可以读取该文件- 随机插入的 LF

和空格 - 这才是真正的问题。为什么会发生这种情况,我做错了什么?
手动将 
 替换为空 确实可以修复文件。但这是一种粗糙的解决方法。
根据 https://stackoverflow.com/a/32399389,我应该在结构体定义中添加 ,innerxml,但该死的是,我似乎无法让它正常工作。
例如,我有
type SCRIBUSUTF8NEW struct {
XMLName xml.Name `xml:"SCRIBUSUTF8NEW"`
(...)
我该如何在那里添加所需的 ,innerxml?
更多关于Golang读取XML、修改值并回写时遇到文件损坏问题的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html
问题出在XML的编码和格式处理上。Scribus XML文件包含XML声明和特定的格式要求,而encoding/xml包在编码时会默认添加XML声明,但可能不保留原始格式。
以下是修复后的代码示例:
package main
import (
"encoding/xml"
"fmt"
"io/ioutil"
"os"
)
type SCRIBUSUTF8NEW struct {
XMLName xml.Name `xml:"SCRIBUSUTF8NEW"`
Version string `xml:"Version,attr"`
Document struct {
Anzpages string `xml:"ANZPAGES,attr"`
// 添加其他需要的字段
} `xml:"DOCUMENT"`
// 添加其他需要的结构字段
}
func main() {
// 读取XML文件
data, err := ioutil.ReadFile("input.sla")
if err != nil {
fmt.Printf("读取文件失败: %v\n", err)
return
}
// 解析XML
var doc SCRIBUSUTF8NEW
err = xml.Unmarshal(data, &doc)
if err != nil {
fmt.Printf("解析XML失败: %v\n", err)
return
}
// 修改值
doc.Version = "1.5.1.modified"
doc.Document.Anzpages = "2"
// 编码XML,添加XML声明
output, err := xml.MarshalIndent(doc, "", " ")
if err != nil {
fmt.Printf("编码XML失败: %v\n", err)
return
}
// 添加XML声明
outputWithHeader := []byte(xml.Header + string(output))
// 写入文件
err = ioutil.WriteFile("output.sla", outputWithHeader, 0644)
if err != nil {
fmt.Printf("写入文件失败: %v\n", err)
return
}
fmt.Println("文件保存成功")
}
如果仍然有问题,可能是因为Scribus对XML格式有特定要求。可以尝试使用更精确的结构定义:
type SCRIBUSUTF8NEW struct {
XMLName xml.Name `xml:"SCRIBUSUTF8NEW"`
Version string `xml:"Version,attr"`
Text string `xml:",chardata"`
Document struct {
XMLName xml.Name `xml:"DOCUMENT"`
Attrs []xml.Attr
Content string `xml:",innerxml"`
} `xml:"DOCUMENT"`
}
func main() {
data, err := ioutil.ReadFile("input.sla")
if err != nil {
fmt.Printf("读取文件失败: %v\n", err)
return
}
var doc SCRIBUSUTF8NEW
err = xml.Unmarshal(data, &doc)
if err != nil {
fmt.Printf("解析XML失败: %v\n", err)
return
}
// 修改DOCUMENT属性
for i, attr := range doc.Document.Attrs {
if attr.Name.Local == "ANZPAGES" {
doc.Document.Attrs[i].Value = "2"
}
}
output, err := xml.MarshalIndent(doc, "", " ")
if err != nil {
fmt.Printf("编码XML失败: %v\n", err)
return
}
err = ioutil.WriteFile("output.sla", []byte(xml.Header+string(output)), 0644)
if err != nil {
fmt.Printf("写入文件失败: %v\n", err)
return
}
}
如果Scribus仍然无法打开文件,可能需要检查XML的编码格式。确保使用UTF-8编码:
func main() {
data, err := ioutil.ReadFile("input.sla")
if err != nil {
fmt.Printf("读取文件失败: %v\n", err)
return
}
var doc SCRIBUSUTF8NEW
err = xml.Unmarshal(data, &doc)
if err != nil {
fmt.Printf("解析XML失败: %v\n", err)
return
}
// 修改操作...
output, err := xml.MarshalIndent(doc, "", " ")
if err != nil {
fmt.Printf("编码XML失败: %v\n", err)
return
}
// 使用带BOM的UTF-8编码
file, err := os.Create("output.sla")
if err != nil {
fmt.Printf("创建文件失败: %v\n", err)
return
}
defer file.Close()
// 写入UTF-8 BOM(可选)
file.Write([]byte{0xEF, 0xBB, 0xBF})
// 写入XML声明
file.WriteString(xml.Header)
// 写入XML内容
file.Write(output)
}
确保结构定义完整,包含所有必要的字段和属性。不完整的结构定义可能导致数据丢失。

