PHP Change text in OpenOffice xml with XMLReader and XMLWriter

1

I need to change the content of the tags in an openoffice document and copy the rest of the contents of the document. The problem is not in finding the tag and replacing its contents, but in copying the parent tags and their attributes correctly, using xmlreader and xmlwriter. Below a draft of the code:

texts.xml

<texts>
 <text id="1">Lorem Ipsum</text>
 <text id="2">dolor sit amet</text>
 <text id="3">consectetur adipiscing elit. </text>
 <text id="4">Nunc vel justo vitae risus fermentum</text>
</texts>

document.xml (example taken from officeopenxml.com )

<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 
<w:document xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml">
<w:body>
 <w:p>
  <w:pPr>
   <w:pStyle w:val="Heading1"/>
  </w:pPr>
  <w:r><w:t>Introduction</w:t></w:r>
 </w:p>
 <w:p>
  <w:r><w:t xml:space="preserve">My children love many nursery rhymes and childhood songs. </w:t></w:r>
 </w:p>
 <w:p>
   <w:pPr>
    <w:pStyle w:val="Heading1"/>
   </w:pPr>
   <w:r><w:t>Favorites</w:t></w:r>
 </w:p>
 <w:p>
  <w:pPr>
   <w:pStyle w:val="Heading2"/>
  </w:pPr>
   <w:r><w:t>Humpty Dumpty</w:t></w:r>
 </w:p>
 <w:p>
   <w:r><w:t xml:space="preserve">None are more beloved than </w:t>           </w:r>
   <w:r>
    <w:rPr><w:b/></w:rPr>
     <w:t>Humpty Dumpty</w:t></w:r><w:r><w:t xml:space="preserve">.     </w:t>
   </w:r>
 </w:p>
 <w:p>
  <w:pPr>
  <w:spacing w:before="120" w:after="120"/>
  <w:ind w:left="720" w:right="720"/>
  <w:rPr>
   <w:sz w:val="18"/>
   </w:rPr>
 </w:pPr>
  <w:r>
  <w:rPr>
  <w:sz w:val="18"/>
  </w:rPr>
   <w:t xml:space="preserve">Humpty Dumpty sat on a wall. </w:t>
   </w:r>
 . . .
 </w:p>
</w:body>
</w:document>

PHP

$original = new XMLReader;
$source = new XMLReader;
$original->open('document.xml');
$source->open('texts.xml')
$writer = new XMLWriter();
$writer->openURI('new_dpcument.xml');
$writer->setIndent(true);
$writer->startDocument("1.0");
$t = 0;
while ($original->read()){
 if ($original->nodeType == XMLREADER::ELEMENT && $original->name !== 'w:t'){ 
  //while the node isn't text node (<w:t>), I'd like to copy everything as is to the new_document.xml. I can just use $content = $original->readOuterXML(); and $writer->writeRaw($content); for nodes not related to w:t,  but it won't work for w:t parent nodes.
}elseif ($original->nodeType == XMLREADER::ELEMENT && $original->name === 'w:t'){
//now that it's a w:t I need to replace its content.
$source->next('text');
$content = $source->readInnerXML($source);
$writer->startElement('w:t');
 $writer->text($content); 
$writer->endElement();
}
}
$writer->endDocument();
$writer->flush();

The result should look something like this:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 
<w:document xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml">
<w:body>
 <w:p>
  <w:pPr>
   <w:pStyle w:val="Heading1"/>
  </w:pPr>
  <w:r><w:t>Lorem Ipsum</w:t></w:r>
 </w:p>
 <w:p>
  <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
 </w:p>
 <w:p>
   <w:pPr>
    <w:pStyle w:val="Heading1"/>
   </w:pPr>
   <w:r><w:t>consectur adipiscing elit. </w:t></w:r>
 </w:p>
 <w:p>
  <w:pPr>
   <w:pStyle w:val="Heading2"/>
  </w:pPr>
   <w:r><w:t>Nunc vel justo vitae risus fermentum</w:t></w:r>
 </w:p>
 <w:p>
   <w:r><w:t xml:space="preserve">None are more beloved than </w:t>           </w:r>
   <w:r>
    <w:rPr><w:b/></w:rPr>
     <w:t>Humpty Dumpty</w:t></w:r><w:r><w:t xml:space="preserve">.     </w:t>
   </w:r>
 </w:p>
 <w:p>
  <w:pPr>
  <w:spacing w:before="120" w:after="120"/>
  <w:ind w:left="720" w:right="720"/>
  <w:rPr>
   <w:sz w:val="18"/>
   </w:rPr>
 </w:pPr>
  <w:r>
  <w:rPr>
  <w:sz w:val="18"/>
  </w:rPr>
   <w:t xml:space="preserve">Humpty Dumpty sat on a wall. </w:t>
   </w:r>
 . . .
 </w:p>
</w:body>
</w:document>
    
asked by anonymous 11.03.2015 / 18:37

1 answer

1

I was able to resolve with an answer of SOen.

I'm using the class XMLReaderIterator version 0.1.8

Below the code:

require('xmlreader-iterators.php'); // require XMLReaderIterator library

$xmlInputFile = 'data/worddocument.xml';
$xmlXliffFile = 'data/segments.xliff';

$reader = new XMLReader();
$reader->open($xmlInputFile);

$writer = new XMLWriter();
$writer->openMemory();

$iterator = new XMLWritingIteration($writer, $reader);

$writer->startDocument();

$textCount = 0;
foreach ($iterator as $node) {
    $isElement = $node->nodeType === XMLReader::ELEMENT;

    if ($isElement && $node->name === 'w:t') {
        $textCount++;

        $writer->startElement($node->name);

        $node->next();
        $iterator->skipNextRead();

        $writer->text(sprintf("TEXT #%d", $textCount));
        $writer->endElement();
    } else {
        // handle everything else
        $iterator->write();
    }
}

$writer->endDocument();
echo $writer->outputMemory(true);
    
21.03.2015 / 03:27