4434faab6256a9bb682324d0a302423009af833b
[phpeclipse.git] /
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: XMLPartitionScanner.java,v 1.1 2004-09-02 18:28:03 jsurfer Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27
28 /**
29  * 
30  * 
31  * @author Igor Malinin
32  */
33 public class XMLPartitionScanner implements IPartitionTokenScanner {
34         public static final String XML_PI         = "__xml_processing_instruction";
35         public static final String XML_COMMENT    = "__xml_comment";
36         public static final String XML_DECL       = "__xml_declaration";
37         public static final String XML_TAG        = "__xml_tag";
38         public static final String XML_ATTRIBUTE  = "__xml_attribute";
39         public static final String XML_CDATA      = "__xml_cdata";
40
41         public static final String DTD_INTERNAL         = "__dtd_internal";
42         public static final String DTD_INTERNAL_PI      = "__dtd_internal_pi";
43         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
44         public static final String DTD_INTERNAL_DECL    = "__dtd_internal_declaration";
45         public static final String DTD_CONDITIONAL      = "__dtd_conditional";
46
47         public static final int STATE_DEFAULT     = 0;
48         public static final int STATE_TAG         = 1;
49         public static final int STATE_DECL        = 2;
50         public static final int STATE_CDATA       = 4;
51
52         public static final int STATE_INTERNAL    = 8;
53
54         protected IDocument document;
55         protected int end;
56
57         protected int offset;
58         protected int length;
59
60         protected int position;
61         protected int state;
62
63         protected boolean parsedtd;
64
65         protected Map tokens = new HashMap();
66
67         public XMLPartitionScanner(boolean parsedtd) {
68                 this.parsedtd = parsedtd;
69         }
70
71         /*
72          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
73          */
74         public IToken nextToken() {
75                 offset += length;
76
77                 switch (state) {
78                         case STATE_TAG:
79                                 return nextTagToken();
80
81                         case STATE_DECL:
82                                 return nextDeclToken();
83
84                         case STATE_CDATA:
85                                 return nextCDATAToken();
86                 }
87
88                 switch (read()) {
89                         case ICharacterScanner.EOF:
90                                 state = STATE_DEFAULT;
91                                 return getToken(null);
92
93                         case '<':
94                                 switch (read()) {
95                                         case ICharacterScanner.EOF:
96                                                 if (parsedtd || isInternal()) {
97                                                         break;
98                                                 }
99
100                                                 state = STATE_DEFAULT;
101                                                 return getToken(XML_TAG);
102
103                                         case '?': // <?  <?PI
104                                                 return nextPIToken();
105
106                                         case '!': // <!  <!DEFINITION or <![CDATA[ or <!--COMMENT
107                                                 switch (read()) {
108                                                         case ICharacterScanner.EOF:
109                                                                 state = STATE_DEFAULT;
110                                                                 return getToken(XML_TAG);
111
112                                                         case '-': // <!-  <!--COMMENT
113                                                                 switch (read()) {
114                                                                         case ICharacterScanner.EOF:
115                                                                                 return nextDeclToken();
116
117                                                                         case '-': // <!--
118                                                                                 return nextCommentToken();
119                                                                 }
120
121                                                         case '[': // <![  <![CDATA[ or <![%cond;[
122                                                                 if (parsedtd) {
123                                                                         return nextConditionalToken();
124                                                                 }
125
126                                                                 if (!isInternal()) {
127                                                                         return nextCDATAToken();
128                                                                 }
129                                                 }
130
131                                                 return nextDeclToken();
132                                 }
133
134                                 if (parsedtd || isInternal()) {
135                                         break;
136                                 }
137
138                                 unread();
139
140                                 return nextTagToken();
141
142                         case ']':
143                                 if (isInternal()) {
144                                         unread();
145
146                                         state = STATE_DECL;
147                                         length = 0;
148                                         return nextToken();
149                                 }
150                 }
151
152 loop:
153                 while (true) {
154                         switch (read()) {
155                                 case ICharacterScanner.EOF:
156                                         state = STATE_DEFAULT;
157                                         return getToken(null);
158
159                                 case '<':
160                                         if (parsedtd || isInternal()) {
161                                                 switch (read()) {
162                                                         case ICharacterScanner.EOF:
163                                                                 state = STATE_DEFAULT;
164                                                                 return getToken(null);
165
166                                                         case '!':
167                                                         case '?':
168                                                                 unread();
169                                                                 break;
170
171                                                         default:
172                                                                 continue loop;
173                                                 }
174                                         }
175
176                                         unread();
177
178                                         state &= STATE_INTERNAL;
179                                         return getToken(isInternal() ? DTD_INTERNAL : null);
180
181                                 case ']':
182                                         if (isInternal()) {
183                                                 unread();
184
185                                                 state = STATE_DECL;
186                                                 if (position == offset) {
187                                                         // nothing between
188                                                         length = 0;
189                                                         return nextToken();
190                                                 }
191
192                                                 return getToken(DTD_INTERNAL);
193                                         }
194                         }
195                 }
196         }
197
198         private IToken nextTagToken() {
199                 int quot = read();
200
201                 switch (quot) {
202                         case ICharacterScanner.EOF:
203                         case '>':
204                                 state = STATE_DEFAULT;
205                                 return getToken(XML_TAG);
206
207                         case '"': case '\'':
208                                 while (true) {
209                                         int ch = read();
210
211                                         if (ch == quot) {
212                                                 state = STATE_TAG;
213                                                 return getToken(XML_ATTRIBUTE);
214                                         }
215
216                                         switch (ch) {
217                                                 case '<':
218                                                         unread();
219
220                                                 case ICharacterScanner.EOF:
221                                                         state = STATE_DEFAULT;
222                                                         return getToken(XML_ATTRIBUTE);
223                                         }
224                                 }
225                 }
226
227                 while (true) {
228                         switch (read()) {
229                                 case '<':
230                                         unread();
231
232                                 case ICharacterScanner.EOF:
233                                 case '>':
234                                         state = STATE_DEFAULT;
235                                         return getToken(XML_TAG);
236
237                                 case '"': case '\'':
238                                         unread();
239
240                                         state = STATE_TAG;
241                                         return getToken(XML_TAG);
242                         }
243                 }
244         }
245
246         private IToken nextDeclToken() {
247                 loop: while (true) {
248                         switch (read()) {
249                                 case ICharacterScanner.EOF:
250                                         state = STATE_DEFAULT;
251                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
252
253                                 case '<':
254                                         if (parsedtd || isInternal()) {
255                                                 switch (read()) {
256                                                         case ICharacterScanner.EOF:
257                                                                 state = STATE_DEFAULT;
258                                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
259
260                                                         case '!':
261                                                         case '?':
262                                                                 unread();
263                                                                 break;
264
265                                                         default:
266                                                                 continue loop;
267                                                 }
268                                         }
269
270                                         unread();
271
272                                 case '>':
273                                         state &= STATE_INTERNAL;
274                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
275
276                                 case '[': // <!DOCTYPE xxx [dtd]>
277                                         if (!isInternal()) {
278                                                 state = STATE_INTERNAL;
279                                                 return getToken(XML_DECL);
280                                         }
281                         }
282                 }
283         }
284
285         private IToken nextCommentToken() {
286                 state &= STATE_INTERNAL;
287
288                 loop: while (true) {
289                         switch (read()) {
290                                 case ICharacterScanner.EOF:
291                                         break loop;
292
293                                 case '-': // -  -->
294                                         switch (read()) {
295                                                 case ICharacterScanner.EOF:
296                                                         break loop;
297
298                                                 case '-': // --  -->
299                                                         switch (read()) {
300                                                                 case ICharacterScanner.EOF:
301                                                                 case '>':
302                                                                         break loop;
303                                                         }
304
305                                                         unread();
306                                                         break loop;
307                                         }
308                         }
309                 }
310
311                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
312         }
313
314         private IToken nextPIToken() {
315                 state &= STATE_INTERNAL;
316
317                 loop: while (true) {
318                         switch (read()) {
319                                 case ICharacterScanner.EOF:
320                                         break loop;
321
322                                 case '?': // ?  ?>
323                                         switch (read()) {
324                                                 case ICharacterScanner.EOF:
325                                                 case '>':
326                                                         break loop;
327                                         }
328
329                                         unread();
330                         }
331                 }
332
333                 return getToken(isInternal() ? DTD_INTERNAL_PI : XML_PI);
334         }
335
336         private IToken nextCDATAToken() {
337                 state = STATE_DEFAULT;
338
339 loop:
340         while (true) {
341                         switch (read()) {
342                                 case ICharacterScanner.EOF:
343                                         break loop;
344
345                                 case ']': // ]  ]]>
346                                         switch (read()) {
347                                                 case ICharacterScanner.EOF:
348                                                         break loop;
349
350                                                 case ']': // ]]  ]]>
351                                                         switch (read()) {
352                                                                 case ICharacterScanner.EOF:
353                                                                 case '>': // ]]>
354                                                                         break loop;
355                                                         }
356
357                                                         unread();
358                                                         unread();
359                                                         continue loop;
360                                         }
361                         }
362                 }
363
364                 return getToken(XML_CDATA);
365         }
366
367         private IToken nextConditionalToken() {
368                 state = STATE_DEFAULT;
369
370                 int level = 1;
371
372 loop:
373         while (true) {
374                         switch (read()) {
375                                 case ICharacterScanner.EOF:
376                                         break loop;
377
378                                 case '<': // -  -->
379                                         switch (read()) {
380                                                 case ICharacterScanner.EOF:
381                                                         break loop;
382
383                                                 case '!': // --  -->
384                                                         switch (read()) {
385                                                                 case ICharacterScanner.EOF:
386                                                                         break loop;
387
388                                                                 case '[':
389                                                                         ++level;
390                                                                         continue loop;
391                                                         }
392
393                                                         unread();
394                                                         continue loop;
395                                         }
396
397                                         unread();
398                                         continue loop;
399
400                                 case ']': // -  -->
401                                         switch (read()) {
402                                                 case ICharacterScanner.EOF:
403                                                         break loop;
404
405                                                 case ']': // --  -->
406                                                         switch (read()) {
407                                                                 case ICharacterScanner.EOF:
408                                                                 case '>':
409                                                                         if (--level == 0) {
410                                                                                 break loop;
411                                                                         }
412
413                                                                         continue loop;
414                                                         }
415
416                                                         unread();
417                                                         unread();
418                                                         continue loop;
419                                         }
420                         }
421                 }
422
423                 return getToken(DTD_CONDITIONAL);
424         }
425
426         private IToken getToken(String type) {
427                 length = position - offset;
428
429                 if (length == 0) {
430                         return Token.EOF;
431                 }
432
433                 if (type == null) {
434                         return Token.UNDEFINED;
435                 }
436
437                 IToken token = (IToken) tokens.get(type);
438                 if (token == null) {
439                         token = new Token(type);
440                         tokens.put(type, token);
441                 }
442
443                 return token;
444         }
445
446         private boolean isInternal() {
447                 return (state & STATE_INTERNAL) != 0;
448         }
449
450         private int read() {
451                 if (position >= end) {
452                         return ICharacterScanner.EOF;
453                 }
454
455                 try {
456                         return document.getChar(position++);
457                 } catch (BadLocationException e) {
458                         --position;
459                         return ICharacterScanner.EOF;
460                 }
461         }
462
463         private void unread() {
464                 --position;
465         }
466
467         /*
468          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
469          */
470         public int getTokenOffset() {
471           Assert.isTrue(offset>=0, Integer.toString(offset));
472                 return offset;
473         }
474
475         /*
476          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
477          */
478         public int getTokenLength() {
479                 return length;
480         }
481
482         /*
483          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int, int)
484          */
485         public void setRange(IDocument document, int offset, int length) {
486                 this.document = document;
487                 this.end = offset + length;
488
489                 this.offset = offset;
490                 this.position = offset;
491                 this.length = 0;
492
493                 this.state = STATE_DEFAULT;
494         }
495
496         /*
497          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
498          */
499         public void setPartialRange(
500                 IDocument document, int offset, int length,
501                 String contentType, int partitionOffset
502         ) {
503                 this.document = document;
504                 this.end = offset + length;
505
506                 // NB! Undocumented value: -1
507                 if (partitionOffset >= 0) {
508                         offset = partitionOffset;
509                 }
510
511                 this.offset = offset;
512                 this.position = offset;
513                 this.length = 0;
514
515                 if (contentType == XML_ATTRIBUTE) {
516                         state = STATE_TAG;
517                         return;
518                 }
519
520                 if (contentType == XML_TAG) {
521                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
522                         return;
523                 }
524
525                 if (contentType == XML_DECL) {
526                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
527                         return;
528                 }
529
530                 if (contentType == DTD_INTERNAL ||
531                         contentType == DTD_INTERNAL_PI ||
532                         contentType == DTD_INTERNAL_DECL ||
533                         contentType == DTD_INTERNAL_COMMENT
534                 ) {
535                         state = STATE_INTERNAL;
536                         return;
537                 }
538
539                 state = STATE_DEFAULT;
540         }
541
542         private boolean isContinuationPartition() {
543                 try {
544                         String type = document.getContentType(offset - 1);
545
546                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
547                                 return true;
548                         }
549                 } catch (BadLocationException e) {}
550
551                 return false;
552         }
553 }