View Javadoc
Minimize
Table

1   /*
2    * Copyright 2003-2007 the original author or authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  
18  package org.codehaus.groovy.antlr;
19  
20  import java.io.IOException;
21  import java.io.Reader;
22  
23  import antlr.CharScanner;
24  import antlr.Token;
25  import antlr.TokenStreamException;
26  
27  /**
28   * Translates GLS-defined unicode escapes into characters. Throws an exception
29   * in the event of an invalid unicode escape being detected.
30   *
31   * <p>No attempt has been made to optimize this class for speed or
32   * space.</p>
33   *
34   * @version $Revision: 21730 $
35   */
36  public class UnicodeEscapingReader extends Reader {
37  
38      private final Reader reader;
39      private CharScanner lexer;
40      private boolean hasNextChar = false;
41      private int nextChar;
42      private final SourceBuffer sourceBuffer;
43      private int previousLine;
44      private int numUnicodeEscapesFound = 0;
45      private int numUnicodeEscapesFoundOnCurrentLine = 0;
46  
47      private static class DummyLexer extends CharScanner {
48          final private Token t = new Token();
49          public Token nextToken() throws TokenStreamException {
50              return t;
51          }
52          @Override
53          public int getColumn() {
54              return 0;
55          }
56          @Override
57          public int getLine() {
58              return 0;
59          }
60      }
61      
62      /**
63       * Constructor.
64       * @param reader The reader that this reader will filter over.
65       */
66      public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) {
67          this.reader = reader;
68          this.sourceBuffer = sourceBuffer;
69          this.lexer = new DummyLexer();
70      }
71  
72      /**
73       * Sets the lexer that is using this reader. Must be called before the
74       * lexer is used.
75       */
76      public void setLexer(CharScanner lexer) {
77          this.lexer = lexer;
78      }
79  
80      /**
81       * Reads characters from the underlying reader.
82       * @see java.io.Reader#read(char[],int,int)
83       */
84      public int read(char cbuf[], int off, int len) throws IOException {
85          int c = 0;
86          int count = 0;
87          while (count < len && (c = read())!= -1) {
88              cbuf[off + count] = (char) c;
89              count++;
90          }
91          return (count == 0 && c == -1) ? -1 : count;
92      }
93  
94      /**
95       * Gets the next character from the underlying reader,
96       * translating escapes as required.
97       * @see java.io.Reader#close()
98       */
99      public int read() throws IOException {
100         if (hasNextChar) {
101             hasNextChar = false;
102             write(nextChar);
103             return nextChar;
104         }
105 
106         if (previousLine != lexer.getLine()) {
107             // new line, so reset unicode escapes
108             numUnicodeEscapesFoundOnCurrentLine = 0;
109             previousLine = lexer.getLine();
110         }
111         
112         int c = reader.read();
113         if (c != '\\') {
114             write(c);
115             return c;
116         }
117 
118         // Have one backslash, continue if next char is 'u'
119         c = reader.read();
120         if (c != 'u') {
121             hasNextChar = true;
122             nextChar = c;
123             write('\\');
124             return '\\';
125         }
126 
127         // Swallow multiple 'u's
128         int numberOfUChars = 0;
129         do {
130             numberOfUChars++;
131             c = reader.read();
132         } while (c == 'u');
133 
134         // Get first hex digit
135         checkHexDigit(c);
136         StringBuffer charNum = new StringBuffer();
137         charNum.append((char) c);
138 
139         // Must now be three more hex digits
140         for (int i = 0; i < 3; i++) {
141             c = reader.read();
142             checkHexDigit(c);
143             charNum.append((char) c);
144         }
145         int rv = Integer.parseInt(charNum.toString(), 16);
146         write(rv);
147         
148         numUnicodeEscapesFound += 4 + numberOfUChars;
149         numUnicodeEscapesFoundOnCurrentLine += 4 + numberOfUChars;
150 
151         return rv;
152     }
153     private void write(int c) {
154         if (sourceBuffer != null) {sourceBuffer.write(c);}
155     }
156     /**
157      * Checks that the given character is indeed a hex digit.
158      */
159     private void checkHexDigit(int c) throws IOException {
160         if (c >= '0' && c <= '9') {
161             return;
162         }
163         if (c >= 'a' && c <= 'f') {
164             return;
165         }
166         if (c >= 'A' && c <= 'F') {
167             return;
168         }
169         // Causes the invalid escape to be skipped
170         hasNextChar = true;
171         nextChar = c;
172         throw new IOException("Did not find four digit hex character code."
173                 + " line: " + lexer.getLine() + " col:" + lexer.getColumn());
174     }
175 
176     public int getUnescapedUnicodeColumnCount() {
177         return numUnicodeEscapesFoundOnCurrentLine;
178     }
179 
180     public int getUnescapedUnicodeOffsetCount() {
181         return numUnicodeEscapesFound;
182     }
183 
184     /**
185      * Closes this reader by calling close on the underlying reader.
186      * @see java.io.Reader#close()
187      */
188     public void close() throws IOException {
189         reader.close();
190     }
191 }