1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package net.sf.ediknight.codec.edifact.parser;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.Reader;
29 import java.io.UnsupportedEncodingException;
30 import java.util.Arrays;
31
32 import net.sf.ediknight.ParseException;
33 import net.sf.ediknight.Recognizer;
34 import net.sf.ediknight.edi.EDIParseException;
35 import net.sf.ediknight.edi.EDIParser;
36 import net.sf.ediknight.edi.SyntaxHandler;
37 import net.sf.ediknight.edi.UnexpectedEndOfDocumentException;
38 import net.sf.ediknight.codec.edifact.xml.ISO9735XMLReader;
39 import net.sf.ediknight.io.JailedInputStream;
40
41 import org.xml.sax.XMLReader;
42
43
44 /***
45 * This is the EDIFACT parser.
46 */
47 public final class ISO9735Parser
48 implements EDIParser<ISO9735Format> {
49
50 /*** */
51 private static final int BUFLEN = 1024;
52
53 /*** */
54 private static final int EOF = -1;
55
56 /*** */
57 private static final int COMPONENT_SEPARATOR = 0;
58
59 /*** */
60 private static final int DATA_SEPARATOR = 1;
61
62 /*** Comma or full stop. */
63 private static final int DECIMAL_NOTATION = 2;
64
65 /*** If not used, insert space character. */
66 private static final int RELEASE_INDICATOR = 3;
67
68 /*** */
69 private static final int SEGMENT_TERMINATOR = 5;
70
71 /*** */
72 private static final int DATA = 6;
73
74 /*** */
75 private static final int WHITESPACE = 7;
76
77 /*** */
78 private static final int UNKNOWN = 8;
79
80 /*** */
81 private Recognizer<ISO9735Format> recognizer;
82
83 /*** */
84 private ISO9735Format format;
85
86 /*** */
87 private Reader reader;
88
89 /*** */
90 private StringBuffer token = new StringBuffer();
91
92 /*** */
93 private int ch;
94
95 /*** */
96 private int tc;
97
98 /*** */
99 private SyntaxHandler handler;
100
101
102 /***
103 * @return the format
104 */
105 public ISO9735Format getFormat() {
106 return format;
107 }
108
109
110 /***
111 * {@inheritDoc}
112 */
113 public SyntaxHandler getSyntaxHandler() {
114 return handler;
115 }
116
117
118 /***
119 * {@inheritDoc}
120 */
121 public void setSyntaxHandler(SyntaxHandler handler) {
122 this.handler = handler;
123 }
124
125
126 /***
127 * {@inheritDoc}
128 */
129 public void parse(File file)
130 throws IOException, ParseException {
131 if (file == null) {
132 throw new NullPointerException();
133 }
134 InputStream stream = new FileInputStream(file);
135 try {
136 parse(stream);
137 } finally {
138 stream.close();
139 }
140 }
141
142
143 /***
144 * {@inheritDoc}
145 */
146 public void parse(InputStream stream)
147 throws IOException, ParseException {
148 if (stream == null) {
149 throw new NullPointerException();
150 }
151 if (!(stream instanceof JailedInputStream)) {
152 stream = new JailedInputStream(stream);
153 }
154 Recognizer<ISO9735Format> rec = getRecognizer();
155 stream.mark(BUFLEN);
156 format = rec.inspect(stream).getFormat();
157 stream.reset();
158 ((JailedInputStream) stream).release();
159 Reader r = new InputStreamReader(
160 stream,
161 format.getEncoding());
162 parse(r);
163 }
164
165
166 /***
167 * Parse an EDIFACT message.
168 *
169 * @param reader a reader
170 * @throws IOException if an I/O error occurs while parsing
171 * @throws ParseException if the document is invalid
172 */
173 void parse(Reader r)
174 throws IOException, ParseException {
175 this.reader = r;
176 if (handler == null) {
177 handler = new DummySyntaxHandler();
178 }
179 ch = reader.read();
180 nextToken();
181 while (tc != EOF) {
182 String ts = token.toString();
183 if (ts.equals("UNA")) {
184 checkServiceStringAdvice();
185 } else {
186 parseSegment();
187 }
188 }
189 handler.finish();
190 }
191
192
193 /***
194 * {@inheritDoc}
195 * @see net.sf.ediknight.Parser#getRecognizer()
196 */
197 public Recognizer<ISO9735Format> getRecognizer() {
198 if (recognizer == null) {
199 recognizer = new ISO9735Recognizer(this);
200 }
201 return recognizer;
202 }
203
204
205 /***
206 * {@inheritDoc}
207 */
208 public boolean isEncodingSupported(String encoding) {
209 try {
210 ByteArrayInputStream dummy =
211 new ByteArrayInputStream(new byte[0]);
212 new InputStreamReader(dummy, encoding);
213 } catch (UnsupportedEncodingException ex) {
214 return false;
215 }
216 return true;
217 }
218
219
220 /***
221 * {@inheritDoc}
222 */
223 public XMLReader getXMLReader() {
224 ISO9735XMLReader xmlReader =
225 new ISO9735XMLReader(this);
226 return xmlReader;
227 }
228
229
230 /***
231 * @param format the format to set
232 */
233 void setFormat(ISO9735Format format) {
234 this.format = format;
235 }
236
237
238 /***
239 * Parse the service advice string.
240 *
241 * @throws IOException if an I/O error occurs
242 */
243 private void checkServiceStringAdvice()
244 throws IOException, ParseException {
245 char[] serviceCharacters = new char[7];
246 serviceCharacters[0] = (char) ch;
247 int len = serviceCharacters.length - 1;
248 if (reader.read(serviceCharacters, 1, len) != len) {
249 throw new UnexpectedEndOfDocumentException(
250 "Premature end of document", 0, 0);
251 }
252 if (serviceCharacters[6] == 'U') {
253 serviceCharacters[6] = 0;
254 }
255 if (!Arrays.equals(
256 serviceCharacters,
257 format.getServiceCharacters())) {
258 throw new ParseException(
259 "UNA service characters don't match");
260 }
261 if (serviceCharacters[6] == 0) {
262 ch = 'U';
263 } else {
264 ch = reader.read();
265 }
266 nextToken();
267 while (tc == WHITESPACE) {
268 nextToken();
269 }
270 }
271
272
273 /***
274 * Parse a segment.
275 *
276 * @throws IOException if an I/O error occurs
277 * @throws EDIParseException if there is an EDI format error
278 */
279 private void parseSegment()
280 throws IOException, EDIParseException {
281 handler.nextSegment(token.toString());
282 nextToken();
283 if (tc == EOF) {
284 throw new UnexpectedEndOfDocumentException(
285 "Premature end of document", 0, 0);
286 }
287 while (tc != EOF && tc != SEGMENT_TERMINATOR) {
288 if (tc == DATA_SEPARATOR) {
289 handler.nextCompositeElement();
290 } else if (tc == COMPONENT_SEPARATOR) {
291 handler.nextSimpleElement();
292 } else if (tc == WHITESPACE || tc == DATA) {
293 handler.characters(token.toString());
294 } else {
295 throw new EDIParseException(
296 "Invalid token: "
297 + token.toString(),
298 0, 0);
299 }
300 nextToken();
301 }
302 nextToken();
303 while (tc == WHITESPACE) {
304 nextToken();
305 }
306 }
307
308
309 /***
310 * Fetch the next token.
311 *
312 * @throws IOException if an I/O error occurs
313 */
314 private void nextToken()
315 throws IOException {
316 token.setLength(0);
317 if (tc == EOF) {
318 return;
319 }
320 if (ch == -1) {
321 tc = EOF;
322 return;
323 }
324 tc = serviceToken();
325 switch (tc) {
326 case COMPONENT_SEPARATOR:
327 case DATA_SEPARATOR:
328 case SEGMENT_TERMINATOR:
329 token.append((char) ch);
330 ch = reader.read();
331 return;
332 case RELEASE_INDICATOR:
333 case DECIMAL_NOTATION:
334 default:
335 if (Character.isWhitespace((char) ch)) {
336 tc = WHITESPACE;
337 while (ch != -1
338 && Character.isWhitespace((char) ch)) {
339 token.append((char) ch);
340 ch = reader.read();
341 }
342 } else {
343 tc = DATA;
344 int st = serviceToken();
345 boolean release = false;
346 while (ch != -1
347 && (release
348 || (st != COMPONENT_SEPARATOR
349 && st != DATA_SEPARATOR
350 && st != SEGMENT_TERMINATOR))) {
351 if (st == RELEASE_INDICATOR && !release) {
352 release = true;
353 } else {
354 token.append((char) ch);
355 release = false;
356 }
357 ch = reader.read();
358 if (Character.isWhitespace((char) ch)) {
359 break;
360 }
361 st = serviceToken();
362 }
363 }
364 }
365 }
366
367
368 /***
369 * Determines the type of token.
370 *
371 * @return the token type
372 */
373 private int serviceToken() {
374 if (ch == -1) {
375 return EOF;
376 }
377 char[] serviceCharacters =
378 format.getServiceCharacters();
379 for (int k = 0; k < serviceCharacters.length; ++k) {
380 if (ch == serviceCharacters[k]) {
381 return k;
382 }
383 }
384 return UNKNOWN;
385 }
386
387 }
388