2 * @(#)Node.java 1.11 2000/08/16
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
35 Used for elements and text nodes
36 element name is null for text nodes
37 start and end are offsets into lexbuf
38 which contains the textual content of
39 all elements in the parse tree.
41 parent and content allow traversal
42 of the parse tree in any direction.
43 attributes are represented as a linked
44 list of AttVal nodes which hold the
45 strings for attribute/value pairs.
50 public static final short RootNode = 0;
51 public static final short DocTypeTag = 1;
52 public static final short CommentTag = 2;
53 public static final short ProcInsTag = 3;
54 public static final short TextNode = 4;
55 public static final short StartTag = 5;
56 public static final short EndTag = 6;
57 public static final short StartEndTag = 7;
58 public static final short CDATATag = 8;
59 public static final short SectionTag = 9;
60 public static final short AspTag = 10;
61 public static final short JsteTag = 11;
62 public static final short PhpTag = 12;
64 protected Node parent;
68 protected int start; /* start of span onto text array */
69 protected int end; /* end of span onto text array */
70 protected byte[] textarray; /* the text array */
71 protected short type; /* TextNode, StartTag, EndTag etc. */
72 protected boolean closed; /* true if closed by explicit end tag */
73 protected boolean implicit; /* true if inferred */
74 protected boolean linebreak; /* true if followed by a line break */
75 protected Dict was; /* old tag when it was changed */
76 protected Dict tag; /* tag's dictionary definition */
77 protected String element; /* name (null for text nodes) */
78 protected AttVal attributes;
79 protected Node content;
83 this(TextNode, null, 0, 0);
86 public Node(short type, byte[] textarray, int start, int end)
94 this.textarray = textarray;
97 this.implicit = false;
98 this.linebreak = false;
102 this.attributes = null;
106 public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
114 this.textarray = textarray;
117 this.implicit = false;
118 this.linebreak = false;
121 this.element = element;
122 this.attributes = null;
124 if (type == StartTag || type == StartEndTag || type == EndTag)
128 /* used to clone heading nodes when split by an <HR> */
129 protected Object clone()
131 Node node = new Node();
133 node.parent = this.parent;
134 if (this.textarray != null)
136 node.textarray = new byte[this.end - this.start];
138 node.end = this.end - this.start;
140 System.arraycopy(this.textarray, this.start,
141 node.textarray, node.start, node.end);
143 node.type = this.type;
144 node.closed = this.closed;
145 node.implicit = this.implicit;
146 node.linebreak = this.linebreak;
149 if (this.element != null)
150 node.element = this.element;
151 if (this.attributes != null)
152 node.attributes = (AttVal)this.attributes.clone();
156 public AttVal getAttrByName(String name)
160 for (attr = this.attributes; attr != null; attr = attr.next)
163 attr.attribute != null &&
164 attr.attribute.equals(name))
171 /* default method for checking an element's attributes */
172 public void checkAttributes( Lexer lexer )
176 for (attval = this.attributes; attval != null; attval = attval.next)
177 attval.checkAttribute( lexer, this );
180 public void checkUniqueAttributes(Lexer lexer)
184 for (attval = this.attributes; attval != null; attval = attval.next) {
185 if (attval.asp == null && attval.php == null)
186 attval.checkUniqueAttribute(lexer, this);
190 public void addAttribute(String name, String value)
192 AttVal av = new AttVal(null, null, null, null,
195 AttributeTable.getDefaultAttributeTable().findAttribute(av);
197 if (this.attributes == null)
198 this.attributes = av;
199 else /* append to end of attributes */
201 AttVal here = this.attributes;
203 while (here.next != null)
210 /* remove attribute from node then free it */
211 public void removeAttribute(AttVal attr)
217 for (av = this.attributes; av != null; av = next)
226 this.attributes = next;
233 /* find doctype element */
234 public Node findDocType()
238 for (node = this.content;
239 node != null && node.type != DocTypeTag; node = node.next);
244 public void discardDocType()
248 node = findDocType();
251 if (node.prev != null)
252 node.prev.next = node.next;
254 node.parent.content = node.next;
256 if (node.next != null)
257 node.next.prev = node.prev;
263 /* remove node from markup tree and discard it */
264 public static Node discardElement(Node element)
277 /* insert node into markup tree */
278 public static void insertNodeAtStart(Node element, Node node)
280 node.parent = element;
282 if (element.content == null)
285 element.content.prev = node; // AQ added 13 Apr 2000
287 node.next = element.content;
289 element.content = node;
292 /* insert node into markup tree */
293 public static void insertNodeAtEnd(Node element, Node node)
295 node.parent = element;
296 node.prev = element.last;
298 if (element.last != null)
299 element.last.next = node;
301 element.content = node;
307 insert node into markup tree in pace of element
308 which is moved to become the child of the node
310 public static void insertNodeAsParent(Node element, Node node)
312 node.content = element;
314 node.parent = element.parent;
315 element.parent = node;
317 if (node.parent.content == element)
318 node.parent.content = node;
320 if (node.parent.last == element)
321 node.parent.last = node;
323 node.prev = element.prev;
326 if (node.prev != null)
327 node.prev.next = node;
329 node.next = element.next;
332 if (node.next != null)
333 node.next.prev = node;
336 /* insert node into markup tree before element */
337 public static void insertNodeBeforeElement(Node element, Node node)
341 parent = element.parent;
342 node.parent = parent;
344 node.prev = element.prev;
347 if (node.prev != null)
348 node.prev.next = node;
350 if (parent.content == element)
351 parent.content = node;
354 /* insert node into markup tree after element */
355 public static void insertNodeAfterElement(Node element, Node node)
359 parent = element.parent;
360 node.parent = parent;
362 // AQ - 13Jan2000 fix for parent == null
363 if (parent != null && parent.last == element)
367 node.next = element.next;
368 // AQ - 13Jan2000 fix for node.next == null
369 if (node.next != null)
370 node.next.prev = node;
377 public static void trimEmptyElement(Lexer lexer, Node element)
379 TagTable tt = lexer.configuration.tt;
381 if (lexer.canPrune(element))
383 if (element.type != TextNode)
384 Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
386 discardElement(element);
388 else if (element.tag == tt.tagP && element.content == null)
390 /* replace <p></p> by <br><br> to preserve formatting */
391 Node node = lexer.inferredTag("br");
392 Node.coerceNode(lexer, element, tt.tagBr);
393 Node.insertNodeAfterElement(element, node);
399 <em>hello </em><strong>world</strong>
401 <em>hello</em> <strong>world</strong>
403 If last child of element is a text node
404 then trim trailing white space character
405 moving it to after element's end tag.
407 public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
410 TagTable tt = lexer.configuration.tt;
412 if (last != null && last.type == Node.TextNode &&
413 last.end > last.start)
415 c = lexer.lexbuf[last.end - 1];
417 if (c == 160 || c == (byte)' ')
419 /* take care with <td> </td> */
420 if (element.tag == tt.tagTd ||
421 element.tag == tt.tagTh)
423 if (last.end > last.start + 1)
430 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
431 !((element.tag.model & Dict.CM_FIELD) != 0))
432 lexer.insertspace = true;
434 /* if empty string then delete from parse tree */
435 if (last.start == last.end)
436 trimEmptyElement(lexer, last);
444 <p>hello<em> world</em>
446 <p>hello <em>world</em>
448 Trims initial space, by moving it before the
449 start tag, or if this element is the first in
450 parent's content, then by discarding the space
452 public static void trimInitialSpace(Lexer lexer, Node element, Node text)
456 // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
458 if (text.type == TextNode && text.textarray[text.start] == (byte)' '
459 && (text.start < text.end))
461 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
462 !((element.tag.model & Dict.CM_FIELD) != 0) &&
463 element.parent.content != element)
467 if (prev != null && prev.type == TextNode)
469 if (prev.textarray[prev.end - 1] != (byte)' ')
470 prev.textarray[prev.end++] = (byte)' ';
474 else /* create new node */
476 node = lexer.newNode();
477 // Local fix for bug 228486 (GLP). This handles the case
478 // where we need to create a preceeding text node but there are
479 // no "slots" in textarray that we can steal from the current
480 // element. Therefore, we create a new textarray containing
481 // just the blank. When Tidy is fixed, this should be removed.
482 if (element.start >= element.end)
486 node.textarray = new byte[1];
490 node.start = element.start++;
491 node.end = element.start;
492 node.textarray = element.textarray;
494 node.textarray[node.start] = (byte)' ';
500 node.parent = element.parent;
504 /* discard the space in current node */
510 Move initial and trailing space out.
517 <em>hello </em><strong>world</strong>
519 <em>hello</em> <strong>world</strong>
521 public static void trimSpaces(Lexer lexer, Node element)
523 Node text = element.content;
524 TagTable tt = lexer.configuration.tt;
526 if (text != null && text.type == Node.TextNode &&
527 element.tag != tt.tagPre)
528 trimInitialSpace(lexer, element, text);
532 if (text != null && text.type == Node.TextNode)
533 trimTrailingSpace(lexer, element, text);
536 public boolean isDescendantOf(Dict tag)
540 for (parent = this.parent;
541 parent != null; parent = parent.parent)
543 if (parent.tag == tag)
551 the doctype has been found after other tags,
552 and needs moving to before the html element
554 public static void insertDocType(Lexer lexer, Node element, Node doctype)
556 TagTable tt = lexer.configuration.tt;
558 Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
560 while (element.tag != tt.tagHtml)
561 element = element.parent;
563 insertNodeBeforeElement(element, doctype);
566 public Node findBody(TagTable tt)
572 while (node != null && node.tag != tt.tagHtml)
580 while (node != null && node.tag != tt.tagBody)
586 public boolean isElement()
588 return (this.type == StartTag || this.type == StartEndTag ? true : false);
592 unexpected content in table row is moved to just before
593 the table in accordance with Netscape and IE. This code
594 assumes that node hasn't been inserted into the row.
596 public static void moveBeforeTable(Node row, Node node, TagTable tt)
600 /* first find the table element */
601 for (table = row.parent; table != null; table = table.parent)
603 if (table.tag == tt.tagTable)
605 if (table.parent.content == table)
606 table.parent.content = node;
608 node.prev = table.prev;
611 node.parent = table.parent;
613 if (node.prev != null)
614 node.prev.next = node;
622 if a table row is empty then insert an empty cell
623 this practice is consistent with browser behavior
624 and avoids potential problems with row spanning cells
626 public static void fixEmptyRow(Lexer lexer, Node row)
630 if (row.content == null)
632 cell = lexer.inferredTag("td");
633 insertNodeAtEnd(row, cell);
634 Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
638 public static void coerceNode(Lexer lexer, Node node, Dict tag)
640 Node tmp = lexer.inferredTag(tag.name);
641 Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
644 node.type = StartTag;
645 node.implicit = true;
646 node.element = tag.name;
649 /* extract a node and its children from a markup tree */
650 public static void removeNode(Node node)
652 if (node.prev != null)
653 node.prev.next = node.next;
655 if (node.next != null)
656 node.next.prev = node.prev;
658 if (node.parent != null)
660 if (node.parent.content == node)
661 node.parent.content = node.next;
663 if (node.parent.last == node)
664 node.parent.last = node.prev;
667 node.parent = node.prev = node.next = null;
670 public static boolean insertMisc(Node element, Node node)
672 if (node.type == CommentTag ||
673 node.type == ProcInsTag ||
674 node.type == CDATATag ||
675 node.type == SectionTag ||
676 node.type == AspTag ||
677 node.type == JsteTag ||
680 insertNodeAtEnd(element, node);
688 used to determine how attributes
689 without values should be printed
690 this was introduced to deal with
691 user defined tags e.g. Cold Fusion
693 public static boolean isNewNode(Node node)
695 if (node != null && node.tag != null)
697 return ((node.tag.model & Dict.CM_NEW) != 0);
703 public boolean hasOneChild()
705 return (this.content != null && this.content.next == null);
708 /* find html element */
709 public Node findHTML(TagTable tt)
713 for (node = this.content;
714 node != null && node.tag != tt.tagHtml; node = node.next);
719 public Node findHEAD(TagTable tt)
723 node = this.findHTML(tt);
727 for (node = node.content;
728 node != null && node.tag != tt.tagHead;
735 public boolean checkNodeIntegrity()
738 boolean found = false;
740 if (this.prev != null)
742 if (this.prev.next != this)
746 if (this.next != null)
748 if (this.next.prev != this)
752 if (this.parent != null)
754 if (this.prev == null && this.parent.content != this)
757 if (this.next == null && this.parent.last != this)
760 for (child = this.parent.content; child != null; child = child.next)
771 for (child = this.content; child != null; child = child.next)
772 if (!child.checkNodeIntegrity())
779 Add class="foo" to node
781 public static void addClass(Node node, String classname)
783 AttVal classattr = node.getAttrByName("class");
786 if there already is a class attribute
787 then append class name after a space
789 if (classattr != null)
791 classattr.value = classattr.value + " " + classname;
793 else /* create new class attribute */
794 node.addAttribute("class", classname);
797 /* --------------------- DEBUG -------------------------- */
799 private static final String[] nodeTypeString =
814 public String toString()
821 s += nodeTypeString[n.type];
823 if (n.element != null)
827 if (n.type == TextNode ||
828 n.type == CommentTag ||
829 n.type == ProcInsTag) {
831 if (n.textarray != null && n.start <= n.end) {
833 s += Lexer.getString(n.textarray, n.start, n.end - n.start);
840 if (n.content != null)
841 s += n.content.toString();
851 /* --------------------- END DEBUG ---------------------- */
854 /* --------------------- DOM ---------------------------- */
856 protected org.w3c.dom.Node adapter = null;
858 protected org.w3c.dom.Node getAdapter()
865 adapter = new DOMDocumentImpl(this);
869 adapter = new DOMElementImpl(this);
872 adapter = new DOMDocumentTypeImpl(this);
875 adapter = new DOMCommentImpl(this);
878 adapter = new DOMTextImpl(this);
881 adapter = new DOMCDATASectionImpl(this);
884 adapter = new DOMProcessingInstructionImpl(this);
887 adapter = new DOMNodeImpl(this);
893 protected Node cloneNode(boolean deep)
895 Node node = (Node)this.clone();
900 for (child = this.content; child != null; child = child.next)
902 newChild = child.cloneNode(deep);
903 insertNodeAtEnd(node, newChild);
910 protected void setType(short newType)
915 /* --------------------- END DOM ------------------------ */