0f41873aa835ec601d650e97d9393831cddcedb4
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpeclipse / xml / ui / internal / text / XMLPartitionScanner.java
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: XMLPartitionScanner.java,v 1.5 2006-10-21 23:14:13 pombredanne Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 //incastrix
20 //import org.eclipse.jface.text.Assert;
21 import org.eclipse.core.runtime.Assert;
22 import org.eclipse.jface.text.BadLocationException;
23 import org.eclipse.jface.text.IDocument;
24 import org.eclipse.jface.text.rules.ICharacterScanner;
25 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
26 import org.eclipse.jface.text.rules.IToken;
27 import org.eclipse.jface.text.rules.Token;
28
29 /**
30  * 
31  * 
32  * @author Igor Malinin
33  */
34 public class XMLPartitionScanner implements IPartitionTokenScanner {
35         public static final String XML_PI = "__xml_processing_instruction";
36
37         public static final String XML_COMMENT = "__xml_comment";
38
39         public static final String XML_DECL = "__xml_declaration";
40
41         public static final String XML_TAG = "__xml_tag";
42
43         public static final String XML_ATTRIBUTE = "__xml_attribute";
44
45         public static final String XML_CDATA = "__xml_cdata";
46
47         public static final String DTD_INTERNAL = "__dtd_internal";
48
49         public static final String DTD_INTERNAL_PI = "__dtd_internal_pi";
50
51         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
52
53         public static final String DTD_INTERNAL_DECL = "__dtd_internal_declaration";
54
55         public static final String DTD_CONDITIONAL = "__dtd_conditional";
56
57         public static final int STATE_DEFAULT = 0;
58
59         public static final int STATE_TAG = 1;
60
61         public static final int STATE_DECL = 2;
62
63         public static final int STATE_CDATA = 4;
64
65         public static final int STATE_INTERNAL = 8;
66
67         protected IDocument document;
68
69         protected int end;
70
71         protected int offset;
72
73         protected int length;
74
75         protected int position;
76
77         protected int state;
78
79         protected boolean parsedtd;
80
81         protected Map tokens = new HashMap();
82
83         public XMLPartitionScanner(boolean parsedtd) {
84                 this.parsedtd = parsedtd;
85         }
86
87         /*
88          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
89          */
90         public IToken nextToken() {
91                 offset += length;
92
93                 switch (state) {
94                 case STATE_TAG:
95                         return nextTagToken();
96
97                 case STATE_DECL:
98                         return nextDeclToken();
99
100                 case STATE_CDATA:
101                         return nextCDATAToken();
102                 }
103
104                 switch (read()) {
105                 case ICharacterScanner.EOF:
106                         state = STATE_DEFAULT;
107                         return getToken(null);
108
109                 case '<':
110                         switch (read()) {
111                         case ICharacterScanner.EOF:
112                                 if (parsedtd || isInternal()) {
113                                         break;
114                                 }
115
116                                 state = STATE_DEFAULT;
117                                 return getToken(XML_TAG);
118
119                         case '?': // <? <?PI
120                                 return nextPIToken();
121
122                         case '!': // <! <!DEFINITION or <![CDATA[ or <!--COMMENT
123                                 switch (read()) {
124                                 case ICharacterScanner.EOF:
125                                         state = STATE_DEFAULT;
126                                         return getToken(XML_TAG);
127
128                                 case '-': // <!- <!--COMMENT
129                                         switch (read()) {
130                                         case ICharacterScanner.EOF:
131                                                 return nextDeclToken();
132
133                                         case '-': // <!--
134                                                 return nextCommentToken();
135                                         }
136
137                                 case '[': // <![ <![CDATA[ or <![%cond;[
138                                         if (parsedtd) {
139                                                 return nextConditionalToken();
140                                         }
141
142                                         if (!isInternal()) {
143                                                 return nextCDATAToken();
144                                         }
145                                 }
146
147                                 return nextDeclToken();
148                         }
149
150                         if (parsedtd || isInternal()) {
151                                 break;
152                         }
153
154                         unread();
155
156                         return nextTagToken();
157
158                 case ']':
159                         if (isInternal()) {
160                                 unread();
161
162                                 state = STATE_DECL;
163                                 length = 0;
164                                 return nextToken();
165                         }
166                         break;
167                 default:
168                         unread();
169                 }
170
171                 loop: while (true) {
172                         switch (read()) {
173                         case ICharacterScanner.EOF:
174                                 state = STATE_DEFAULT;
175                                 return getToken(null);
176
177                         case '<':
178                                 if (parsedtd || isInternal()) {
179                                         switch (read()) {
180                                         case ICharacterScanner.EOF:
181                                                 state = STATE_DEFAULT;
182                                                 return getToken(null);
183
184                                         case '!':
185                                         case '?':
186                                                 unread();
187                                                 break;
188
189                                         default:
190                                                 continue loop;
191                                         }
192                                 }
193
194                                 unread();
195
196                                 state &= STATE_INTERNAL;
197                                 return getToken(isInternal() ? DTD_INTERNAL : null);
198
199                         case ']':
200                                 if (isInternal()) {
201                                         unread();
202
203                                         state = STATE_DECL;
204                                         if (position == offset) {
205                                                 // nothing between
206                                                 length = 0;
207                                                 return nextToken();
208                                         }
209
210                                         return getToken(DTD_INTERNAL);
211                                 }
212                         }
213                 }
214         }
215
216         private IToken nextTagToken() {
217                 int quot = read();
218
219                 switch (quot) {
220                 case ICharacterScanner.EOF:
221                 case '>':
222                         state = STATE_DEFAULT;
223                         return getToken(XML_TAG);
224
225                 case '"':
226                 case '\'':
227                         while (true) {
228                                 int ch = read();
229
230                                 if (ch == quot) {
231                                         state = STATE_TAG;
232                                         return getToken(XML_ATTRIBUTE);
233                                 }
234
235                                 switch (ch) {
236                                 case '<':
237                                         unread();
238
239                                 case ICharacterScanner.EOF:
240                                         state = STATE_DEFAULT;
241                                         return getToken(XML_ATTRIBUTE);
242                                 }
243                         }
244                 default:
245                         unread();
246                 }
247
248                 while (true) {
249                         switch (read()) {
250                         case '<':
251                                 unread();
252
253                         case ICharacterScanner.EOF:
254                         case '>':
255                                 state = STATE_DEFAULT;
256                                 return getToken(XML_TAG);
257
258                         case '"':
259                         case '\'':
260                                 unread();
261
262                                 state = STATE_TAG;
263                                 return getToken(XML_TAG);
264                         }
265                 }
266         }
267
268         private IToken nextDeclToken() {
269                 loop: while (true) {
270                         switch (read()) {
271                         case ICharacterScanner.EOF:
272                                 state = STATE_DEFAULT;
273                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
274
275                         case '<':
276                                 if (parsedtd || isInternal()) {
277                                         switch (read()) {
278                                         case ICharacterScanner.EOF:
279                                                 state = STATE_DEFAULT;
280                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
281
282                                         case '!':
283                                         case '?':
284                                                 unread();
285                                                 break;
286
287                                         default:
288                                                 continue loop;
289                                         }
290                                 }
291
292                                 unread();
293
294                         case '>':
295                                 state &= STATE_INTERNAL;
296                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
297
298                         case '[': // <!DOCTYPE xxx [dtd]>
299                                 if (!isInternal()) {
300                                         state = STATE_INTERNAL;
301                                         return getToken(XML_DECL);
302                                 }
303                         }
304                 }
305         }
306
307         private IToken nextCommentToken() {
308                 state &= STATE_INTERNAL;
309
310                 loop: while (true) {
311                         switch (read()) {
312                         case ICharacterScanner.EOF:
313                                 break loop;
314
315                         case '-': // - -->
316                                 switch (read()) {
317                                 case ICharacterScanner.EOF:
318                                         break loop;
319
320                                 case '-': // -- -->
321                                         switch (read()) {
322                                         case ICharacterScanner.EOF:
323                                         case '>':
324                                                 break loop;
325                                         }
326
327                                         unread();
328                                         continue loop;
329                                 }
330                         }
331                 }
332
333                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
334         }
335
336         private IToken nextPIToken() {
337                 state &= STATE_INTERNAL;
338
339                 loop: while (true) {
340                         switch (read()) {
341                         case ICharacterScanner.EOF:
342                                 break loop;
343
344                         case '?': // ? ?>
345                                 switch (read()) {
346                                 case ICharacterScanner.EOF:
347                                 case '>':
348                                         break loop;
349                                 }
350
351                                 unread();
352                         }
353                 }
354
355                 return getToken(isInternal() ? DTD_INTERNAL_PI : XML_PI);
356         }
357
358         private IToken nextCDATAToken() {
359                 state = STATE_DEFAULT;
360
361                 loop: while (true) {
362                         switch (read()) {
363                         case ICharacterScanner.EOF:
364                                 break loop;
365
366                         case ']': // ] ]]>
367                                 switch (read()) {
368                                 case ICharacterScanner.EOF:
369                                         break loop;
370
371                                 case ']': // ]] ]]>
372                                         switch (read()) {
373                                         case ICharacterScanner.EOF:
374                                         case '>': // ]]>
375                                                 break loop;
376                                         }
377
378                                         unread();
379                                         unread();
380                                         continue loop;
381                                 }
382                         }
383                 }
384
385                 return getToken(XML_CDATA);
386         }
387
388         private IToken nextConditionalToken() {
389                 state = STATE_DEFAULT;
390
391                 int level = 1;
392
393                 loop: while (true) {
394                         switch (read()) {
395                         case ICharacterScanner.EOF:
396                                 break loop;
397
398                         case '<': // - -->
399                                 switch (read()) {
400                                 case ICharacterScanner.EOF:
401                                         break loop;
402
403                                 case '!': // -- -->
404                                         switch (read()) {
405                                         case ICharacterScanner.EOF:
406                                                 break loop;
407
408                                         case '[':
409                                                 ++level;
410                                                 continue loop;
411                                         }
412
413                                         unread();
414                                         continue loop;
415                                 }
416
417                                 unread();
418                                 continue loop;
419
420                         case ']': // - -->
421                                 switch (read()) {
422                                 case ICharacterScanner.EOF:
423                                         break loop;
424
425                                 case ']': // -- -->
426                                         switch (read()) {
427                                         case ICharacterScanner.EOF:
428                                         case '>':
429                                                 if (--level == 0) {
430                                                         break loop;
431                                                 }
432
433                                                 continue loop;
434                                         }
435
436                                         unread();
437                                         unread();
438                                         continue loop;
439                                 }
440                         }
441                 }
442
443                 return getToken(DTD_CONDITIONAL);
444         }
445
446         private IToken getToken(String type) {
447                 length = position - offset;
448
449                 if (length == 0) {
450                         return Token.EOF;
451                 }
452
453                 if (type == null) {
454                         return Token.UNDEFINED;
455                 }
456
457                 IToken token = (IToken) tokens.get(type);
458                 if (token == null) {
459                         token = new Token(type);
460                         tokens.put(type, token);
461                 }
462
463                 return token;
464         }
465
466         private boolean isInternal() {
467                 return (state & STATE_INTERNAL) != 0;
468         }
469
470         private int read() {
471                 if (position >= end) {
472                         return ICharacterScanner.EOF;
473                 }
474
475                 try {
476                         return document.getChar(position++);
477                 } catch (BadLocationException e) {
478                         --position;
479                         return ICharacterScanner.EOF;
480                 }
481         }
482
483         private void unread() {
484                 --position;
485         }
486
487         /*
488          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
489          */
490         public int getTokenOffset() {
491                 Assert.isTrue(offset >= 0, Integer.toString(offset));
492                 return offset;
493         }
494
495         /*
496          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
497          */
498         public int getTokenLength() {
499                 return length;
500         }
501
502         /*
503          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
504          *      int)
505          */
506         public void setRange(IDocument document, int offset, int length) {
507                 this.document = document;
508                 this.end = offset + length;
509
510                 this.offset = offset;
511                 this.position = offset;
512                 this.length = 0;
513
514                 this.state = STATE_DEFAULT;
515         }
516
517         /*
518          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
519          */
520         // public void setPartialRange(IDocument document, int offset, int length,
521         // String contentType, int partitionOffset) {
522         // state = STATE_DEFAULT;
523         // if (partitionOffset > -1) {
524         // int delta = offset - partitionOffset;
525         // if (delta > 0) {
526         // setRange(document, partitionOffset, length + delta);
527         // return;
528         // }
529         // }
530         // setRange(document, partitionOffset, length);
531         // }
532         /*
533          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
534          */
535         public void setPartialRange(IDocument document, int offset, int length,
536                         String contentType, int partitionOffset) {
537                 // boolean flag = false;
538                 this.document = document;
539                 this.end = offset + length;
540
541                 // NB! Undocumented value: -1
542                 if (partitionOffset >= 0) {
543                         offset = partitionOffset;
544                         // flag = true;
545                 }
546
547                 this.offset = offset;
548                 this.position = offset;
549                 this.length = 0;
550
551                 // if (flag) {
552                 // state = STATE_DEFAULT;
553                 // return;
554                 // }
555                 if (contentType == XML_ATTRIBUTE) {
556                         state = STATE_TAG;
557                         return;
558                 }
559
560                 if (contentType == XML_TAG) {
561                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
562                         return;
563                 }
564
565                 if (contentType == XML_DECL) {
566                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
567                         return;
568                 }
569
570                 if (contentType == DTD_INTERNAL || contentType == DTD_INTERNAL_PI
571                                 || contentType == DTD_INTERNAL_DECL
572                                 || contentType == DTD_INTERNAL_COMMENT) {
573                         state = STATE_INTERNAL;
574                         return;
575                 }
576
577                 state = STATE_DEFAULT;
578         }
579
580         private boolean isContinuationPartition() {
581                 try {
582                         String type = document.getContentType(offset - 1);
583
584                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
585                                 return true;
586                         }
587                 } catch (BadLocationException e) {
588                 }
589
590                 return false;
591         }
592 }