View Javadoc

1   /*
2    * EDI-Knight Integration and Transformation Platform
3    * Copyright (C) 2006-2007 Holger Joest <hjoest@users.sourceforge.net>
4    *
5    * This program is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation; either version 2 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   */
19  
20  package net.sf.ediknight.codec.x12.parser;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.InputStreamReader;
26  import java.io.Reader;
27  import java.util.HashMap;
28  import java.util.Map;
29  
30  import net.sf.ediknight.Inspection;
31  import net.sf.ediknight.Recognizer;
32  import net.sf.ediknight.edi.SyntaxHandler;
33  
34  
35  /***
36   *
37   */
38  final class ANSIX12Recognizer
39  implements Recognizer<ANSIX12Format> {
40  
41      /*** */
42      private static final char[] DEFAULT_SERVICE_CHARACTERS =
43          new char[] {
44          '*', ':', '\n'
45      };
46  
47      /*** Recognizer hints. */
48      private Map<String, Object> hints =
49          new HashMap<String, Object>();
50  
51      private double likeliness;
52  
53      /*** The associated parser. */
54      private ANSIX12Parser parser;
55  
56  
57      /***
58       * @param parser the parser
59       */
60      ANSIX12Recognizer(ANSIX12Parser parser) {
61          this.parser = parser;
62      }
63  
64  
65      /***
66       * {@inheritDoc}
67       * @see net.sf.ediknight.Recognizer#inspect(java.io.File)
68       */
69      public Inspection<ANSIX12Format> inspect(File file) {
70          try {
71              return null;
72          } catch (Exception ex) {
73              return new Inspection<ANSIX12Format>();
74          }
75      }
76  
77  
78      /***
79       * {@inheritDoc}
80       * @see net.sf.ediknight.Recognizer#inspect(
81       *      java.io.InputStream)
82       */
83      public Inspection<ANSIX12Format> inspect(InputStream stream) {
84          try {
85              return analyze(stream, 1024);
86          } catch (Exception ex) {
87              return new Inspection<ANSIX12Format>();
88          }
89      }
90  
91  
92      /***
93       * {@inheritDoc}
94       * @see net.sf.ediknight.Recognizer#addHint(
95       *      java.lang.String, java.lang.Object)
96       */
97      public void addHint(String hint, Object value) {
98          hints.put(hint, value);
99      }
100 
101 
102     /***
103      * @param istream an input stream
104      * @param lookAhead the number of bytes to look ahead
105      * @return the inspection result
106      * @throws IOException if an I/O error occurs
107      */
108     private Inspection<ANSIX12Format> analyze(
109             InputStream istream,
110             int lookAhead)
111     throws IOException {
112         istream.mark(lookAhead);
113         String encoding =
114             (String) hints.get(Recognizer.ENCODING);
115         if (encoding == null) {
116             encoding = analyzeEncoding(istream);
117         }
118         istream.reset();
119         char[] serviceCharacters =
120             analyzeServiceCharacters(istream, encoding);
121         String directoryVersion =
122             (String) hints.get(Recognizer.DIRECTORY);
123         if (directoryVersion == null) {
124             istream.reset();
125             directoryVersion =
126                 analyzeDirectory(
127                         istream, encoding, serviceCharacters);
128         }
129         ANSIX12Format format =
130             new ANSIX12Format(
131                     encoding,
132                     directoryVersion,
133                     serviceCharacters);
134         parser.setFormat(format);
135         return new Inspection<ANSIX12Format>(
136                 parser, format, likeliness);
137     }
138 
139 
140     /***
141      * @param istream an input stream
142      * @return the guessed encoding
143      * @throws IOException if an I/O error occurs
144      */
145     private String analyzeEncoding(InputStream istream)
146     throws IOException {
147         int b = istream.read();
148         if (b == 0x00) {
149             /* BOM:  00 00 FE FF   UTF-32BE */
150             b = istream.read();
151             if (b == 0x00) {
152                 likeliness = 1d;
153                 return "UTF-32BE";
154             } else if (b == 73) {
155                 likeliness = 1d;
156                 return "UTF-16BE";
157             }
158         } else if (b == 0xef) {
159             /* BOM:  EF BB BF      UTF-8 */
160             likeliness = 1d;
161             return "UTF-8";
162         } else if (b == 0xfe) {
163             /* BOM:  FE FF         UTF-16BE */
164             likeliness = 1d;
165             return "UTF-16BE";
166         } else if (b == 0xff) {
167             /* BOM:  FF FE 00 00   UTF-32, little-endian
168                      FF FE         UTF-16, little-endian */
169             istream.skip(1);
170             b = istream.read();
171             if (b == 0x00) {
172                 likeliness = 1d;
173                 return "UTF-32LE";
174             }
175             likeliness = 1d;
176             return "UTF-16LE";
177         } else if (b == 73) {
178             b = istream.read();
179             if (b == 0x00) {
180                 likeliness = 1d;
181                 return "UTF-16LE";
182             }
183             likeliness = 0.999d;
184             return "ISO-8859-1";
185         } else if (b == 132) {
186             /* Simplified chinese */
187             likeliness = 1d;
188             return "GB18030";
189         } else if (b == 228) {
190             /* looks like EBCDIC */
191             likeliness = 1d;
192             return "CP500";
193         }
194         // try per default
195         likeliness = 0.1d;
196         return "ISO-8859-1";
197     }
198 
199 
200     /***
201      * @param istream an input stream
202      * @param encoding the character encoding
203      * @return the service characters
204      * @throws IOException if an I/O error occurs
205      */
206     private char[] analyzeServiceCharacters(
207             InputStream istream,
208             String encoding)
209     throws IOException {
210         Reader reader = new InputStreamReader(istream, encoding);
211         int ch = reader.read();
212         if (ch != 'I') {
213             return DEFAULT_SERVICE_CHARACTERS;
214         }
215         ch = reader.read();
216         if (ch != 'S') {
217             return DEFAULT_SERVICE_CHARACTERS;
218         }
219         ch = reader.read();
220         if (ch != 'A') {
221             return DEFAULT_SERVICE_CHARACTERS;
222         }
223         char[] serviceCharacters = new char[3];
224         ch = reader.read();
225         if (ch < 1) {
226             return DEFAULT_SERVICE_CHARACTERS;
227         }
228         char dataSeparator = (char) ch;
229         int count = 0;
230         ch = reader.read();
231         while (ch > -1 && count < 14) {
232             if (ch == dataSeparator) {
233                 ++count;
234             }
235             ch = reader.read();
236         }
237         ch = reader.read();
238         if (ch < 1) {
239             return DEFAULT_SERVICE_CHARACTERS;
240         }
241         serviceCharacters[0] = dataSeparator;
242         if (reader.read(serviceCharacters, 1, 2) != 2) {
243             return DEFAULT_SERVICE_CHARACTERS;
244         }
245         return serviceCharacters;
246     }
247 
248 
249     /***
250      * @param stream an input stream
251      * @param encoding the previously determined encoding
252      * @param serviceCharacters the service characters
253      * @return the guessed directory
254      * @throws IOException if an I/O error occurs
255      */
256     private String analyzeDirectory(
257             InputStream stream,
258             String encoding,
259             char[] serviceCharacters)
260     throws IOException {
261         Reader reader = new InputStreamReader(stream, encoding);
262         WhichVersion version = new WhichVersion();
263         ANSIX12Format format =
264             new ANSIX12Format(encoding, null, serviceCharacters);
265         SyntaxHandler save = parser.getSyntaxHandler();
266         try {
267             parser.setSyntaxHandler(version);
268             parser.setFormat(format);
269             parser.parse(reader);
270             return version.toString();
271         } catch (Exception ex) {
272             return version.toString();
273         } finally {
274             parser.setSyntaxHandler(save);
275         }
276     }
277 
278 
279     private static class WhichVersion
280     implements SyntaxHandler {
281 
282         private int segmentCount;
283 
284         private int elementCount;
285 
286         private boolean inGsSegment;
287 
288         private boolean inGs08;
289 
290         private String version0;
291 
292         private String version1;
293 
294 
295         WhichVersion() {
296         }
297 
298         public void characters(String value) {
299             if (value.length() == 6) {
300                 for (int k = 0; k < 6; ++k) {
301                     if (!Character.isDigit(value.charAt(k))) {
302                         return;
303                     }
304                 }
305                 if (inGsSegment && inGs08) {
306                     version0 = value;
307                 } else {
308                     version1 = value;
309                 }
310             }
311         }
312 
313         public void finish() {
314         }
315 
316         public void nextCompositeElement() {
317             inGs08 = false;
318             if (inGsSegment) {
319                 if (elementCount++ == 7) {
320                     inGs08 = true;
321                 }
322             }
323         }
324 
325         public void nextSegment(String segmentId) {
326             if ("GS".equals(segmentId)) {
327                 inGsSegment = true;
328             } else if (inGsSegment) {
329                 throw new PrematureStop();
330             }
331             if (segmentCount++ > 5) {
332                 throw new PrematureStop();
333             }
334         }
335 
336         public void nextSimpleElement() {
337         }
338 
339         @Override
340         public String toString() {
341             if (version0 != null) {
342                 return version0;
343             }
344             if (version1 != null) {
345                 return version1;
346             }
347             return "004010";
348         }
349 
350     }
351 
352     private static class PrematureStop
353     extends RuntimeException {
354 
355         private static final long serialVersionUID = 3268184132387374766L;
356 
357     }
358 
359 }
360