View Javadoc

1   /*
2    * EDI-Knight Integration and Transformation Platform
3    * Copyright (C) 2006-2007 Holger Joest <hjoest@users.sourceforge.net>
4    *
5    * This program is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation; either version 2 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   */
19  
20  package net.sf.ediknight.codec.edifact.parser;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.File;
24  import java.io.FileInputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.InputStreamReader;
28  import java.io.Reader;
29  import java.io.UnsupportedEncodingException;
30  import java.util.Arrays;
31  
32  import net.sf.ediknight.ParseException;
33  import net.sf.ediknight.Recognizer;
34  import net.sf.ediknight.edi.EDIParseException;
35  import net.sf.ediknight.edi.EDIParser;
36  import net.sf.ediknight.edi.SyntaxHandler;
37  import net.sf.ediknight.edi.UnexpectedEndOfDocumentException;
38  import net.sf.ediknight.codec.edifact.xml.ISO9735XMLReader;
39  import net.sf.ediknight.io.JailedInputStream;
40  
41  import org.xml.sax.XMLReader;
42  
43  
44  /***
45   * This is the EDIFACT parser.
46   */
47  public final class ISO9735Parser
48  implements EDIParser<ISO9735Format> {
49  
50      /*** */
51      private static final int BUFLEN = 1024;
52  
53      /*** */
54      private static final int EOF = -1;
55  
56      /*** */
57      private static final int COMPONENT_SEPARATOR = 0;
58  
59      /*** */
60      private static final int DATA_SEPARATOR = 1;
61  
62      /*** Comma or full stop. */
63      private static final int DECIMAL_NOTATION = 2;
64  
65      /*** If not used, insert space character. */
66      private static final int RELEASE_INDICATOR = 3;
67  
68      /*** */
69      private static final int SEGMENT_TERMINATOR = 5;
70  
71      /*** */
72      private static final int DATA = 6;
73  
74      /*** */
75      private static final int WHITESPACE = 7;
76  
77      /*** */
78      private static final int UNKNOWN = 8;
79  
80      /*** */
81      private Recognizer<ISO9735Format> recognizer;
82  
83      /*** */
84      private ISO9735Format format;
85  
86      /*** */
87      private Reader reader;
88  
89      /*** */
90      private StringBuffer token = new StringBuffer();
91  
92      /*** */
93      private int ch;
94  
95      /*** */
96      private int tc;
97  
98      /*** */
99      private SyntaxHandler handler;
100 
101 
102     /***
103      * @return the format
104      */
105     public ISO9735Format getFormat() {
106         return format;
107     }
108 
109 
110     /***
111      * {@inheritDoc}
112      */
113     public SyntaxHandler getSyntaxHandler() {
114         return handler;
115     }
116 
117 
118     /***
119      * {@inheritDoc}
120      */
121     public void setSyntaxHandler(SyntaxHandler handler) {
122         this.handler = handler;
123     }
124 
125 
126     /***
127      * {@inheritDoc}
128      */
129     public void parse(File file)
130     throws IOException, ParseException {
131         if (file == null) {
132             throw new NullPointerException();
133         }
134         InputStream stream = new FileInputStream(file);
135         try {
136             parse(stream);
137         } finally {
138             stream.close();
139         }
140     }
141 
142 
143     /***
144      * {@inheritDoc}
145      */
146     public void parse(InputStream stream)
147     throws IOException, ParseException {
148         if (stream == null) {
149             throw new NullPointerException();
150         }
151         if (!(stream instanceof JailedInputStream)) {
152             stream = new JailedInputStream(stream);
153         }
154         Recognizer<ISO9735Format> rec = getRecognizer();
155         stream.mark(BUFLEN);
156         format = rec.inspect(stream).getFormat();
157         stream.reset();
158         ((JailedInputStream) stream).release();
159         Reader r = new InputStreamReader(
160                 stream,
161                 format.getEncoding());
162         parse(r);
163     }
164 
165 
166     /***
167      * Parse an EDIFACT message.
168      *
169      * @param reader a reader
170      * @throws IOException if an I/O error occurs while parsing
171      * @throws ParseException if the document is invalid
172      */
173     void parse(Reader r)
174     throws IOException, ParseException {
175         this.reader = r;
176         if (handler == null) {
177             handler = new DummySyntaxHandler();
178         }
179         ch = reader.read();
180         nextToken();
181         while (tc != EOF) {
182             String ts = token.toString();
183             if (ts.equals("UNA")) {
184                 checkServiceStringAdvice();
185             } else {
186                 parseSegment();
187             }
188         }
189         handler.finish();
190     }
191 
192 
193     /***
194      * {@inheritDoc}
195      * @see net.sf.ediknight.Parser#getRecognizer()
196      */
197     public Recognizer<ISO9735Format> getRecognizer() {
198         if (recognizer == null) {
199             recognizer = new ISO9735Recognizer(this);
200         }
201         return recognizer;
202     }
203 
204 
205     /***
206      * {@inheritDoc}
207      */
208     public boolean isEncodingSupported(String encoding) {
209         try {
210             ByteArrayInputStream dummy =
211                 new ByteArrayInputStream(new byte[0]);
212             new InputStreamReader(dummy, encoding);
213         } catch (UnsupportedEncodingException ex) {
214             return false;
215         }
216         return true;
217     }
218 
219 
220     /***
221      * {@inheritDoc}
222      */
223     public XMLReader getXMLReader() {
224         ISO9735XMLReader xmlReader =
225             new ISO9735XMLReader(this);
226         return xmlReader;
227     }
228 
229 
230     /***
231      * @param format the format to set
232      */
233     void setFormat(ISO9735Format format) {
234         this.format = format;
235     }
236 
237 
238     /***
239      * Parse the service advice string.
240      *
241      * @throws IOException if an I/O error occurs
242      */
243     private void checkServiceStringAdvice()
244     throws IOException, ParseException {
245         char[] serviceCharacters = new char[7];
246         serviceCharacters[0] = (char) ch;
247         int len = serviceCharacters.length - 1;
248         if (reader.read(serviceCharacters, 1, len) != len) {
249             throw new UnexpectedEndOfDocumentException(
250                     "Premature end of document", 0, 0);
251         }
252         if (serviceCharacters[6] == 'U') {
253             serviceCharacters[6] = 0;
254         }
255         if (!Arrays.equals(
256                 serviceCharacters,
257                 format.getServiceCharacters())) {
258             throw new ParseException(
259                     "UNA service characters don't match");
260         }
261         if (serviceCharacters[6] == 0) {
262             ch = 'U';
263         } else {
264             ch = reader.read();
265         }
266         nextToken();
267         while (tc == WHITESPACE) {
268             nextToken();
269         }
270     }
271 
272 
273     /***
274      * Parse a segment.
275      *
276      * @throws IOException if an I/O error occurs
277      * @throws EDIParseException if there is an EDI format error
278      */
279     private void parseSegment()
280     throws IOException, EDIParseException {
281         handler.nextSegment(token.toString());
282         nextToken();
283         if (tc == EOF) {
284             throw new UnexpectedEndOfDocumentException(
285                     "Premature end of document", 0, 0);
286         }
287         while (tc != EOF && tc != SEGMENT_TERMINATOR) {
288             if (tc == DATA_SEPARATOR) {
289                 handler.nextCompositeElement();
290             } else if (tc == COMPONENT_SEPARATOR) {
291                 handler.nextSimpleElement();
292             } else if (tc == WHITESPACE || tc == DATA) {
293                 handler.characters(token.toString());
294             } else {
295                 throw new EDIParseException(
296                         "Invalid token: "
297                         + token.toString(),
298                         0, 0);
299             }
300             nextToken();
301         }
302         nextToken();
303         while (tc == WHITESPACE) {
304             nextToken();
305         }
306     }
307 
308 
309     /***
310      * Fetch the next token.
311      *
312      * @throws IOException if an I/O error occurs
313      */
314     private void nextToken()
315     throws IOException {
316         token.setLength(0);
317         if (tc == EOF) {
318             return;
319         }
320         if (ch == -1) {
321             tc = EOF;
322             return;
323         }
324         tc = serviceToken();
325         switch (tc) {
326         case COMPONENT_SEPARATOR:
327         case DATA_SEPARATOR:
328         case SEGMENT_TERMINATOR:
329             token.append((char) ch);
330             ch = reader.read();
331             return;
332         case RELEASE_INDICATOR:
333         case DECIMAL_NOTATION:
334         default:
335             if (Character.isWhitespace((char) ch)) {
336                 tc = WHITESPACE;
337                 while (ch != -1
338                         && Character.isWhitespace((char) ch)) {
339                     token.append((char) ch);
340                     ch = reader.read();
341                 }
342             } else {
343                 tc = DATA;
344                 int st = serviceToken();
345                 boolean release = false;
346                 while (ch != -1
347                         && (release
348                                 || (st != COMPONENT_SEPARATOR
349                                         && st != DATA_SEPARATOR
350                                         && st != SEGMENT_TERMINATOR))) {
351                     if (st == RELEASE_INDICATOR && !release) {
352                         release = true;
353                     } else {
354                         token.append((char) ch);
355                         release = false;
356                     }
357                     ch = reader.read();
358                     if (Character.isWhitespace((char) ch)) {
359                         break;
360                     }
361                     st = serviceToken();
362                 }
363             }
364         }
365     }
366 
367 
368     /***
369      * Determines the type of token.
370      *
371      * @return the token type
372      */
373     private int serviceToken() {
374         if (ch == -1) {
375             return EOF;
376         }
377         char[] serviceCharacters =
378             format.getServiceCharacters();
379         for (int k = 0; k < serviceCharacters.length; ++k) {
380             if (ch == serviceCharacters[k]) {
381                 return k;
382             }
383         }
384         return UNKNOWN;
385     }
386 
387 }
388