Selective reading of an XML

1

I'm able to read XML but there are more things I do not need. The problem is that my XML source file has repeated tags in a hierarchy like this (see the PLAN tag that repeats):

<explain>
   <Query-Text>
   </Query-Text>
   <Plan>
        <Node-Type></Node-Type>
        <Startup-Cost></Startup-Cost>
        <Total-Cost></Total-Cost>
        <Plan-Rows></Plan-Rows>
        <Plan-Width></Plan-Width>
        <Actual-Total-Time></Actual-Total-Time>
        <Sort-Key>
            <Item></Item>
        </Sort-Key>
        <Actual-Rows></Actual-Rows>
        <Actual-Loops></Actual-Loops>
        <Plans>
            <Plan>
                <Node-Type></Node-Type>
                <Startup-Cost></Startup-Cost>
                <Total-Cost></Total-Cost>
                <Plan-Rows></Plan-Rows>
                <Plan-Width></Plan-Width>
                <Sort-Key>
                    <Item></Item>
                </Sort-Key>
                <Actual-Rows></Actual-Rows>
                <Actual-Loops></Actual-Loops>
            </Plan>
        </Plans>
    </Plan>
</explain>

I just need to read the PLAN information: "Total-Cost" and "Actual-Total-Time" from the first PLAN tag that appears. The problem is that there is a PLAN in an internal hierarchy and this is being read too, which I do not need.

How can I do to read only the first PLAN tag, not the ones inside that do not interest me?

Follow the code I'm using.

ReadXMLTest.java:

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;

public class ReadXMLTest {

    public static void main(String argv[]) {
        try {
            File fXmlFile = new File("/home/user1/Downloads/LOGT1.xml");
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
            Document doc = dBuilder.parse(fXmlFile);

            doc.getDocumentElement().normalize();
            System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
            NodeList nList = doc.getElementsByTagName("Plan");
            System.out.println("----------------------------");

           for (int temp = 0; temp < nList.getLength(); temp++) {
                Node nNode = nList.item(temp);

                System.out.println("\nCurrent Element :" + nNode.getNodeName());

                if (nNode.getNodeType() == Node.ELEMENT_NODE) {

                    Element eElement = (Element) nNode;
                    System.out.println("Total Cost query: " + eElement.getElementsByTagName("Total-Cost").item(0).getTextContent());
                    System.out.println("Actual Total Time query:" + eElement.getElementsByTagName("Actual-Total-Time").item(0).getTextContent());
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }    
    }
}

LOGT1.XML file:

1. <?xml version="1.0"?>
2. <explain xmlns="http://www.postgresql.org/2009/explain">
3.       <Query-Text>
4.     BEGIN;
5.     EXPLAIN (ANALYZE on, VERBOSE off, COSTS on, BUFFERS off)SELECT l_returnflag,l_linestatus, sum(l_quantity) as sum_qty,
6.     sum(l_extendedprice) as sum_base_price,
7.     sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
8.     sum(l_extendedprice * (1 - l_discount) * (1 + l_tax))
9.     as sum_charge,
10.     avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price,
11.     avg(l_discount) as avg_disc, count(*) as count_order
12.     FROM H_lineitem
 WHERE H_lineitem.l_shipdate &lt;= TO_DATE ('1998/12/01','YYYY/MM/DD') - interval '10' day
14.     GROUP BY H_lineitem.l_returnflag, H_lineitem.l_linestatus
15.     ORDER BY H_lineitem.l_returnflag, H_lineitem.l_linestatus
16.     ;
17.     ROLLBACK;</Query-Text>
18.       <Plan>
19.         <Node-Type>Sort</Node-Type>
20.         <Startup-Cost>399301.55</Startup-Cost>
21.         <Total-Cost>399301.57</Total-Cost>
22.         <Plan-Rows>6</Plan-Rows>
23.         <Plan-Width>36</Plan-Width>
24.         <Actual-Startup-Time>17693.654</Actual-Startup-Time>
25.         <Actual-Total-Time>17693.654</Actual-Total-Time>
26.         <Actual-Rows>4</Actual-Rows>
27.         <Actual-Loops>1</Actual-Loops>
28.         <Sort-Key>
29.           <Item>l_returnflag</Item>
30.           <Item>l_linestatus</Item>
31.         </Sort-Key>
32.         <Plans>
33.           <Plan>
34.             <Node-Type>Aggregate</Node-Type>
35.             <Strategy>Hashed</Strategy>PP
36.             <Parent-Relationship>Outer</Parent-Relationship>
37.             <Startup-Cost>399301.21</Startup-Cost>
38.             <Total-Cost>399301.48</Total-Cost>
39.             <Plan-Rows>6</Plan-Rows>
40.             <Plan-Width>36</Plan-Width>
41.             <Actual-Startup-Time>17693.582</Actual-Startup-Time>
42.             <Actual-Total-Time>17693.586</Actual-Total-Time>
43.             <Actual-Rows>4</Actual-Rows>
44.             <Actual-Loops>1</Actual-Loops>
45.             <Plans>
46.               <Plan>
47.                 <Node-Type>Seq Scan</Node-Type>
48.                 <Parent-Relationship>Outer</Parent-Relationship>
49.                 <Relation-Name>h_lineitem</Relation-Name>
50.                 <Alias>h_lineitem</Alias>
51.                 <Startup-Cost>0.00</Startup-Cost>
52.                 <Total-Cost>250095.98</Total-Cost>
53.                 <Plan-Rows>5968209</Plan-Rows>
54.                 <Plan-Width>36</Plan-Width>
55.                 <Actual-Startup-Time>0.036</Actual-Startup-Time>
56.                 <Actual-Total-Time>7945.899</Actual-Total-Time>
57.                 <Actual-Rows>5996518</Actual-Rows>
58.                 <Actual-Loops>1</Actual-Loops>
59.                 <Filter>(l_shipdate &lt;= (to_date('1998/12/01'::text, 'YYYY/MM/DD'::text) - '10 days'::interval day))</Filter>
60.               </Plan>
61.             </Plans>
62.           </Plan>
63.         </Plans>
64.       </Plan>
65.     </explain>
    
asked by anonymous 13.01.2017 / 20:18

1 answer

1

The easiest way is to use XPath to filter only the Plan elements that are direct children of the explain element.

Example:

XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile("/explain/Plan");
NodeList nl = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);

If you are processing very large files or have performance impacts, you should approach other solutions such as using a SAX approach to XML processing and later handling of events.

    
15.01.2017 / 22:28