-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDomParse.java
369 lines (330 loc) · 12.1 KB
/
DomParse.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
// This code is adapted from DomEcho06.java of SUN' J2EE Tutorial
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.ErrorHandler;
import java.io.*;
import org.w3c.dom.Document;
import org.w3c.dom.DOMException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
public class DomParse
{
// Global value so it can be ref'd by the tree-adapter
static Document document;
// decides whether to compress inline elements like <em> and <b>
static boolean compress = false;
static boolean useSchema = false; // whether the XML doc uses DTD or XML Schema
public static void main(String argv[])
{
if (argv[0].equals("-c"))
{
compress = true;
argv[0] = argv[1]; // replace argv[0] with XML file name
}
else
compress = false;
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
if (isValidating(argv))
{
factory.setValidating(true);
System.out.println("Validation is on");
}
else
System.out.println("Validation is off");
factory.setNamespaceAware(true);
if (useSchema)
factory.setAttribute(
"http://java.sun.com/xml/jaxp/properties/schemaLanguage",
"http://www.w3.org/2001/XMLSchema"
);
try {
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
document = builder.parse( new File(argv[0]) );
} catch (SAXParseException spe) {
// Error generated by the parser
System.out.println("\n** Parsing error"
+ ", line " + spe.getLineNumber()
+ ", uri " + spe.getSystemId());
System.out.println(" " + spe.getMessage() );
// Use the contained exception, if any
Exception x = spe;
if (spe.getException() != null)
x = spe.getException();
x.printStackTrace();
} catch (SAXException sxe) {
// Error generated during parsing)
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
x.printStackTrace();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();
} catch (IOException ioe) {
// I/O error
ioe.printStackTrace();
}
} // main
// Does the XML file have DTD or Schema file for validation?
static boolean isValidating(String argv[])
{
// check whether the top 10 lines of the XML file have "chemaLocation=" or "<!DOCTYPE"
try
{
File f = new File(argv[0]);
BufferedReader br = new BufferedReader(new FileReader(f));
String line = br.readLine();
for (int i = 0; (i < 10) && (line != null); i++)
{
if (line.indexOf("chemaLocation=") != -1)
{
useSchema = true;
return true;
}
if (line.indexOf("<!DOCTYPE") != -1)
{
useSchema = false;
return true;
}
line = br.readLine();
}
}
catch (Exception e){}
return false;
}
// build a dummy DOM tree to show manual DOM tree manipulation
public static void buildDom()
{
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = factory.newDocumentBuilder();
document = builder.newDocument(); // Create from scratch
Element root =
(Element) document.createElement("rootElement");
document.appendChild(root);
// show off the normalize() function for collapsing text nodes
root.appendChild( document.createTextNode("<b>Usage: ") );
root.appendChild( document.createTextNode(" "));
root.appendChild( document.createTextNode("java DomParse [-c] [XML-file]</b><br>") );
root.appendChild( document.createTextNode(
"<br>You use optional \"-c\" if you want to compress inline nodes like <em>."));
root.appendChild( document.createTextNode(
"<br>Set up DTD or Schema files in the XML file."));
// normalize text representation
// getDocumentElement() returns the document's root node
document.getDocumentElement().normalize();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();
}
} // buildDom
// An array of names for DOM node-types
// (Array indexes = nodeType() values.)
static final String[] typeName = {
"none",
"Element",
"Attr",
"Text",
"CDATA",
"EntityRef",
"Entity",
"ProcInstr",
"Comment",
"Document",
"DocType",
"DocFragment",
"Notation",
};
static final int ELEMENT_TYPE = Node.ELEMENT_NODE;
static final int ATTR_TYPE = Node.ATTRIBUTE_NODE;
static final int TEXT_TYPE = Node.TEXT_NODE;
static final int CDATA_TYPE = Node.CDATA_SECTION_NODE;
static final int ENTITYREF_TYPE = Node.ENTITY_REFERENCE_NODE;
static final int ENTITY_TYPE = Node.ENTITY_NODE;
static final int PROCINSTR_TYPE = Node.PROCESSING_INSTRUCTION_NODE;
static final int COMMENT_TYPE = Node.COMMENT_NODE;
static final int DOCUMENT_TYPE = Node.DOCUMENT_NODE;
static final int DOCTYPE_TYPE = Node.DOCUMENT_TYPE_NODE;
static final int DOCFRAG_TYPE = Node.DOCUMENT_FRAGMENT_NODE;
static final int NOTATION_TYPE = Node.NOTATION_NODE;
// determines which nodes should be compressed out
boolean treeElement(String en) {
if (en.equals("b") || en.equals("B") || en.equals("em") || en.equals("EM") ||
en.equals("br") || en.equals("BR"))
return false;
else
return true;
}
// This class wraps a DOM node and returns the text we want to
// display in the tree. It also returns children, index values,
// and child counts.
public class AdapterNode
{
org.w3c.dom.Node domNode;
// Construct an Adapter node from a DOM node
public AdapterNode(org.w3c.dom.Node node) {
domNode = node;
}
// Return a string that identifies this node in the tree
public String toString() {
String s = typeName[domNode.getNodeType()];
String nodeName = domNode.getNodeName();
if (! nodeName.startsWith("#")) {
s += ": " + nodeName;
}
if (compress) {
String t = content().trim();
int x = t.indexOf("\n");
if (x >= 0) t = t.substring(0, x);
s += " " + t;
return s;
}
if (domNode.getNodeValue() != null) {
if (s.startsWith("ProcInstr"))
s += ", ";
else
s += ": ";
// Trim the value to get rid of NL's at the front
String t = domNode.getNodeValue().trim();
int x = t.indexOf("\n");
if (x >= 0) t = t.substring(0, x);
s += t;
}
return s;
}
public String content() {
String s = "";
org.w3c.dom.NodeList nodeList = domNode.getChildNodes();
for (int i=0; i<nodeList.getLength(); i++) {
org.w3c.dom.Node node = nodeList.item(i);
int type = node.getNodeType();
AdapterNode adpNode = new AdapterNode(node); //inefficient, but works
if (type == ELEMENT_TYPE) {
// Skip subelements that are displayed in the tree.
if ( treeElement(node.getNodeName()) ) continue;
// EXTRA-CREDIT HOMEWORK:
// Special case the SLIDE element to use the TITLE text
// and ignore TITLE element when constructing the tree.
// EXTRA-CREDIT
// Convert ITEM elements to html lists using
// <ul>, <li>, </ul> tags
s += "<" + node.getNodeName() + ">";
s += adpNode.content();
s += "</" + node.getNodeName() + ">";
} else if (type == TEXT_TYPE) {
s += node.getNodeValue();
} else if (type == ENTITYREF_TYPE) {
// The content is in the TEXT node under it
s += adpNode.content();
} else if (type == CDATA_TYPE) {
// The "value" has the text, same as a text node.
// while EntityRef has it in a text node underneath.
// (because EntityRef can contain multiple subelements)
// Convert angle brackets and ampersands for display
StringBuffer sb = new StringBuffer( node.getNodeValue() );
for (int j=0; j<sb.length(); j++) {
if (sb.charAt(j) == '<') {
sb.setCharAt(j, '&');
sb.insert(j+1, "lt;");
j += 3;
} else if (sb.charAt(j) == '&') {
sb.setCharAt(j, '&');
sb.insert(j+1, "amp;");
j += 4;
}
}
s += "<pre>" + sb + "\n</pre>";
}
// Ignoring these:
// ATTR_TYPE -- not in the DOM tree
// ENTITY_TYPE -- does not appear in the DOM
// PROCINSTR_TYPE -- not "data"
// COMMENT_TYPE -- not "data"
// DOCUMENT_TYPE -- Root node only. No data to display.
// DOCTYPE_TYPE -- Appears under the root only
// DOCFRAG_TYPE -- equiv. to "document" for fragments
// NOTATION_TYPE -- nothing but binary data in here
}
return s;
}
/*
* Return children, index, and count values
*/
public int index(AdapterNode child) {
//System.err.println("Looking for index of " + child);
int count = childCount();
for (int i=0; i<count; i++) {
AdapterNode n = this.child(i);
if (child.domNode == n.domNode) return i;
}
return -1; // Should never get here.
}
public AdapterNode child(int searchIndex) {
//Note: JTree index is zero-based.
org.w3c.dom.Node node =
domNode.getChildNodes().item(searchIndex);
if (compress) {
// Return Nth displayable node
int elementNodeIndex = 0;
for (int i=0; i<domNode.getChildNodes().getLength(); i++) {
node = domNode.getChildNodes().item(i);
if (node.getNodeType() == ELEMENT_TYPE
&& treeElement( node.getNodeName() )
&& elementNodeIndex++ == searchIndex) {
break;
}
}
}
return new AdapterNode(node);
}
public int childCount() {
if (!compress) {
// Indent this
return domNode.getChildNodes().getLength();
}
int count = 0;
for (int i=0; i<domNode.getChildNodes().getLength(); i++) {
org.w3c.dom.Node node = domNode.getChildNodes().item(i);
if (node.getNodeType() == ELEMENT_TYPE
&& treeElement( node.getNodeName() ))
{
// Note:
// Have to check for proper type.
// The DOCTYPE element also has the right name
++count;
}
}
return count;
}
}
}
class MyErrorHandler implements ErrorHandler
{
// treat validation errors as fatal
public void error(SAXParseException e)
throws SAXParseException
{
throw e;
}
// dump warnings too
public void warning(SAXParseException err)
throws SAXParseException
{
System.out.println("** Warning"
+ ", line " + err.getLineNumber()
+ ", uri " + err.getSystemId());
System.out.println(" " + err.getMessage());
}
public void fatalError(SAXParseException e)
throws SAXParseException
{
throw e;
}
}