001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019
020import java.text.ParseException;
021import java.util.List;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation of FTPFileEntryParser and FTPFileListParser for IBM zOS/MVS
028 * Systems.
029 *
030 * @author <a href="henrik.sorensen@balcab.ch">Henrik Sorensen</a>
031 * @author <a href="jnadler@srcginc.com">Jeff Nadler</a>
032 * @author <a href="wnoto@openfinance.com">William Noto</a>
033 *
034 * @version $Id: MVSFTPEntryParser.java 1230358 2012-01-12 01:51:02Z sebb $
035 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for
036 *      usage instructions)
037 */
038public class MVSFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
039
040    static final int UNKNOWN_LIST_TYPE = -1;
041    static final int FILE_LIST_TYPE = 0;
042    static final int MEMBER_LIST_TYPE = 1;
043    static final int UNIX_LIST_TYPE = 2;
044    static final int JES_LEVEL_1_LIST_TYPE = 3;
045    static final int JES_LEVEL_2_LIST_TYPE = 4;
046
047    private int isType = UNKNOWN_LIST_TYPE;
048
049    /**
050     * Fallback parser for Unix-style listings
051     */
052    private UnixFTPEntryParser unixFTPEntryParser;
053
054    /**
055     * Dates are ignored for file lists, but are used for member lists where
056     * possible
057     */
058    static final String DEFAULT_DATE_FORMAT = "yyyy/MM/dd HH:mm"; // 2001/09/18
059                                                                    // 13:52
060
061    /**
062     * Matches these entries: Volume Unit Referred Ext Used Recfm Lrecl BlkSz
063     * Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80 80 PS MDI.OKL.WORK
064     *
065     */
066    static final String FILE_LIST_REGEX = "\\S+\\s+" + // volume
067                                                                // ignored
068            "\\S+\\s+" + // unit - ignored
069            "\\S+\\s+" + // access date - ignored
070            "\\S+\\s+" + // extents -ignored
071            "\\S+\\s+" + // used - ignored
072            "[FV]\\S*\\s+" + // recfm - must start with F or V
073            "\\S+\\s+" + // logical record length -ignored
074            "\\S+\\s+" + // block size - ignored
075            "(PS|PO|PO-E)\\s+" + // Dataset organisation. Many exist
076            // but only support: PS, PO, PO-E
077            "(\\S+)\\s*"; // Dataset Name (file name)
078
079    /**
080     * Matches these entries: Name VV.MM Created Changed Size Init Mod Id
081     * TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11 0 KIL001
082     */
083    static final String MEMBER_LIST_REGEX = "(\\S+)\\s+" + // name
084            "\\S+\\s+" + // version, modification (ignored)
085            "\\S+\\s+" + // create date (ignored)
086            "(\\S+)\\s+" + // modification date
087            "(\\S+)\\s+" + // modification time
088            "\\S+\\s+" + // size in lines (ignored)
089            "\\S+\\s+" + // size in lines at creation(ignored)
090            "\\S+\\s+" + // lines modified (ignored)
091            "\\S+\\s*"; // id of user who modified (ignored)
092
093    /**
094     * Matches these entries, note: no header: IBMUSER1 JOB01906 OUTPUT 3 Spool
095     * Files 012345678901234567890123456789012345678901234 1 2 3 4
096     */
097    static final String JES_LEVEL_1_LIST_REGEX = "(\\S+)\\s+" + // job
098                                                                        // name
099                                                                        // ignored
100            "(\\S+)\\s+" + // job number
101            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE)
102            "(\\S+)\\s+" + // number of spool files
103            "(\\S+)\\s+" + // Text "Spool" ignored
104            "(\\S+)\\s*" // Text "Files" ignored
105    ;
106
107    /**
108     * JES INTERFACE LEVEL 2 parser Matches these entries: JOBNAME JOBID OWNER
109     * STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool files
110     * IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
111     * 012345678901234567890123456789012345678901234 1 2 3 4
112     * 012345678901234567890123456789012345678901234567890
113     */
114
115    static final String JES_LEVEL_2_LIST_REGEX = "(\\S+)\\s+" + // job
116                                                                        // name
117                                                                        // ignored
118            "(\\S+)\\s+" + // job number
119            "(\\S+)\\s+" + // owner ignored
120            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE) ignored
121            "(\\S+)\\s+" + // job class ignored
122            "(\\S+).*" // rest ignored
123    ;
124
125    /*
126     * ---------------------------------------------------------------------
127     * Very brief and incomplete description of the zOS/MVS-filesystem. (Note:
128     * "zOS" is the operating system on the mainframe, and is the new name for
129     * MVS)
130     *
131     * The filesystem on the mainframe does not have hierarchal structure as for
132     * example the unix filesystem. For a more comprehensive description, please
133     * refer to the IBM manuals
134     *
135     * @LINK:
136     * http://publibfp.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/dgt2d440/CONTENTS
137     *
138     *
139     * Dataset names =============
140     *
141     * A dataset name consist of a number of qualifiers separated by '.', each
142     * qualifier can be at most 8 characters, and the total length of a dataset
143     * can be max 44 characters including the dots.
144     *
145     *
146     * Dataset organisation ====================
147     *
148     * A dataset represents a piece of storage allocated on one or more disks.
149     * The structure of the storage is described with the field dataset
150     * organinsation (DSORG). There are a number of dataset organisations, but
151     * only two are usable for FTP transfer.
152     *
153     * DSORG: PS: sequential, or flat file PO: partitioned dataset PO-E:
154     * extended partitioned dataset
155     *
156     * The PS file is just a flat file, as you would find it on the unix file
157     * system.
158     *
159     * The PO and PO-E files, can be compared to a single level directory
160     * structure. A PO file consist of a number of dataset members, or files if
161     * you will. It is possible to CD into the file, and to retrieve the
162     * individual members.
163     *
164     *
165     * Dataset record format =====================
166     *
167     * The physical layout of the dataset is described on the dataset itself.
168     * There are a number of record formats (RECFM), but just a few is relavant
169     * for the FTP transfer.
170     *
171     * Any one beginning with either F or V can safely used by FTP transfer. All
172     * others should only be used with great care, so this version will just
173     * ignore the other record formats. F means a fixed number of records per
174     * allocated storage, and V means a variable number of records.
175     *
176     *
177     * Other notes ===========
178     *
179     * The file system supports automatically backup and retrieval of datasets.
180     * If a file is backed up, the ftp LIST command will return: ARCIVE Not
181     * Direct Access Device KJ.IOP998.ERROR.PL.UNITTEST
182     *
183     *
184     * Implementation notes ====================
185     *
186     * Only datasets that have dsorg PS, PO or PO-E and have recfm beginning
187     * with F or V, is fully parsed.
188     *
189     * The following fields in FTPFile is used: FTPFile.Rawlisting: Always set.
190     * FTPFile.Type: DIRECTORY_TYPE or FILE_TYPE or UNKNOWN FTPFile.Name: name
191     * FTPFile.Timestamp: change time or null
192     *
193     *
194     *
195     * Additional information ======================
196     *
197     * The MVS ftp server supports a number of features via the FTP interface.
198     * The features are controlled with the FTP command quote site filetype=<SEQ|JES|DB2>
199     * SEQ is the default and used for normal file transfer JES is used to
200     * interact with the Job Entry Subsystem (JES) similar to a job scheduler
201     * DB2 is used to interact with a DB2 subsystem
202     *
203     * This parser supports SEQ and JES.
204     *
205     *
206     *
207     *
208     *
209     *
210     */
211
212    /**
213     * The sole constructor for a MVSFTPEntryParser object.
214     *
215     */
216    public MVSFTPEntryParser() {
217        super(""); // note the regex is set in preParse.
218        super.configure(null); // configure parser with default configurations
219    }
220
221    /**
222     * Parses a line of an z/OS - MVS FTP server file listing and converts it
223     * into a usable format in the form of an <code> FTPFile </code> instance.
224     * If the file listing line doesn't describe a file, then
225     * <code> null </code> is returned. Otherwise a <code> FTPFile </code>
226     * instance representing the file is returned.
227     *
228     * @param entry
229     *            A line of text from the file listing
230     * @return An FTPFile instance corresponding to the supplied entry
231     */
232    public FTPFile parseFTPEntry(String entry) {
233        boolean isParsed = false;
234        FTPFile f = new FTPFile();
235
236        if (isType == FILE_LIST_TYPE) {
237            isParsed = parseFileList(f, entry);
238        } else if (isType == MEMBER_LIST_TYPE) {
239            isParsed = parseMemberList(f, entry);
240            if (!isParsed) {
241                isParsed = parseSimpleEntry(f, entry);
242            }
243        } else if (isType == UNIX_LIST_TYPE) {
244            isParsed = parseUnixList(f, entry);
245        } else if (isType == JES_LEVEL_1_LIST_TYPE) {
246            isParsed = parseJeslevel1List(f, entry);
247        } else if (isType == JES_LEVEL_2_LIST_TYPE) {
248            isParsed = parseJeslevel2List(f, entry);
249        }
250
251        if (!isParsed) {
252            f = null;
253        }
254
255        return f;
256    }
257
258    /**
259     * Parse entries representing a dataset list. Only datasets with DSORG PS or
260     * PO or PO-E and with RECFM F* or V* will be parsed.
261     *
262     * Format of ZOS/MVS file list: 1 2 3 4 5 6 7 8 9 10 Volume Unit Referred
263     * Ext Used Recfm Lrecl BlkSz Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80
264     * 80 PS MDI.OKL.WORK ARCIVE Not Direct Access Device
265     * KJ.IOP998.ERROR.PL.UNITTEST B1N231 3390 2006/03/20 1 15 VB 256 27998 PO
266     * PLU B1N231 3390 2006/03/20 1 15 VB 256 27998 PO-E PLB
267     *
268     * ----------------------------------- Group within Regex [1] Volume [2]
269     * Unit [3] Referred [4] Ext: number of extents [5] Used [6] Recfm: Record
270     * format [7] Lrecl: Logical record length [8] BlkSz: Block size [9] Dsorg:
271     * Dataset organisation. Many exists but only support: PS, PO, PO-E [10]
272     * Dsname: Dataset name
273     *
274     * Note: When volume is ARCIVE, it means the dataset is stored somewhere in
275     * a tape archive. These entries is currently not supported by this parser.
276     * A null value is returned.
277     *
278     * @param file
279     *            will be updated with Name, Type, Timestamp if parsed.
280     * @param entry zosDirectoryEntry
281     * @return true: entry was parsed, false: entry was not parsed.
282     */
283    private boolean parseFileList(FTPFile file, String entry) {
284        if (matches(entry)) {
285            file.setRawListing(entry);
286            String name = group(2);
287            String dsorg = group(1);
288            file.setName(name);
289
290            // DSORG
291            if ("PS".equals(dsorg)) {
292                file.setType(FTPFile.FILE_TYPE);
293            }
294            else if ("PO".equals(dsorg) || "PO-E".equals(dsorg)) {
295                // regex already ruled out anything other than PO or PO-E
296                file.setType(FTPFile.DIRECTORY_TYPE);
297            }
298            else {
299                return false;
300            }
301
302            return true;
303        }
304
305        return false;
306    }
307
308    /**
309     * Parse entries within a partitioned dataset.
310     *
311     * Format of a memberlist within a PDS: 1 2 3 4 5 6 7 8 9 Name VV.MM Created
312     * Changed Size Init Mod Id TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11
313     * 0 KIL001 TBTOOL 01.12 2002/09/12 2004/11/26 19:54 51 28 0 KIL001
314     *
315     * ------------------------------------------- [1] Name [2] VV.MM: Version .
316     * modification [3] Created: yyyy / MM / dd [4,5] Changed: yyyy / MM / dd
317     * HH:mm [6] Size: number of lines [7] Init: number of lines when first
318     * created [8] Mod: number of modified lines a last save [9] Id: User id for
319     * last update
320     *
321     *
322     * @param file
323     *            will be updated with Name, Type and Timestamp if parsed.
324     * @param entry zosDirectoryEntry
325     * @return true: entry was parsed, false: entry was not parsed.
326     */
327    private boolean parseMemberList(FTPFile file, String entry) {
328        if (matches(entry)) {
329            file.setRawListing(entry);
330            String name = group(1);
331            String datestr = group(2) + " " + group(3);
332            file.setName(name);
333            file.setType(FTPFile.FILE_TYPE);
334            try {
335                file.setTimestamp(super.parseTimestamp(datestr));
336            } catch (ParseException e) {
337                e.printStackTrace();
338                // just ignore parsing errors.
339                // TODO check this is ok
340                return false; // this is a parsing failure too.
341            }
342            return true;
343        }
344
345        return false;
346    }
347
348    /**
349     * Assigns the name to the first word of the entry. Only to be used from a
350     * safe context, for example from a memberlist, where the regex for some
351     * reason fails. Then just assign the name field of FTPFile.
352     *
353     * @param file
354     * @param entry
355     * @return
356     */
357    private boolean parseSimpleEntry(FTPFile file, String entry) {
358        if (entry != null && entry.length() > 0) {
359            file.setRawListing(entry);
360            String name = entry.split(" ")[0];
361            file.setName(name);
362            file.setType(FTPFile.FILE_TYPE);
363            return true;
364        }
365        return false;
366    }
367
368    /**
369     * Parse the entry as a standard unix file. Using the UnixFTPEntryParser.
370     *
371     * @param file
372     * @param entry
373     * @return true: entry is parsed, false: entry could not be parsed.
374     */
375    private boolean parseUnixList(FTPFile file, String entry) {
376        file = unixFTPEntryParser.parseFTPEntry(entry);
377        if (file == null) {
378            return false;
379        }
380        return true;
381    }
382
383    /**
384     * Matches these entries, note: no header: [1] [2] [3] [4] [5] IBMUSER1
385     * JOB01906 OUTPUT 3 Spool Files
386     * 012345678901234567890123456789012345678901234 1 2 3 4
387     * ------------------------------------------- Group in regex [1] Job name
388     * [2] Job number [3] Job status (INPUT,ACTIVE,OUTPUT) [4] Number of sysout
389     * files [5] The string "Spool Files"
390     *
391     *
392     * @param file
393     *            will be updated with Name, Type and Timestamp if parsed.
394     * @param entry zosDirectoryEntry
395     * @return true: entry was parsed, false: entry was not parsed.
396     */
397    private boolean parseJeslevel1List(FTPFile file, String entry) {
398        if (matches(entry)) {
399            if (group(3).equalsIgnoreCase("OUTPUT")) {
400                file.setRawListing(entry);
401                String name = group(2); /* Job Number, used by GET */
402                file.setName(name);
403                file.setType(FTPFile.FILE_TYPE);
404                return true;
405            }
406        }
407
408        return false;
409    }
410
411    /**
412     * Matches these entries, note: no header: [1] [2] [3] [4] [5] JOBNAME JOBID
413     * OWNER STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool
414     * files IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
415     * 012345678901234567890123456789012345678901234 1 2 3 4
416     * ------------------------------------------- Group in regex [1] Job name
417     * [2] Job number [3] Owner [4] Job status (INPUT,ACTIVE,OUTPUT) [5] Job
418     * Class [6] The rest
419     *
420     *
421     * @param file
422     *            will be updated with Name, Type and Timestamp if parsed.
423     * @param entry zosDirectoryEntry
424     * @return true: entry was parsed, false: entry was not parsed.
425     */
426    private boolean parseJeslevel2List(FTPFile file, String entry) {
427        if (matches(entry)) {
428            if (group(4).equalsIgnoreCase("OUTPUT")) {
429                file.setRawListing(entry);
430                String name = group(2); /* Job Number, used by GET */
431                file.setName(name);
432                file.setType(FTPFile.FILE_TYPE);
433                return true;
434            }
435        }
436
437        return false;
438    }
439
440    /**
441     * preParse is called as part of the interface. Per definition is is called
442     * before the parsing takes place. Three kind of lists is recognize:
443     * z/OS-MVS File lists z/OS-MVS Member lists unix file lists
444     * @since 2.0
445     */
446    @Override
447    public List<String> preParse(List<String> orig) {
448        // simply remove the header line. Composite logic will take care of the
449        // two different types of
450        // list in short order.
451        if (orig != null && orig.size() > 0) {
452            String header = orig.get(0);
453            if (header.indexOf("Volume") >= 0 && header.indexOf("Dsname") >= 0) {
454                setType(FILE_LIST_TYPE);
455                super.setRegex(FILE_LIST_REGEX);
456            } else if (header.indexOf("Name") >= 0 && header.indexOf("Id") >= 0) {
457                setType(MEMBER_LIST_TYPE);
458                super.setRegex(MEMBER_LIST_REGEX);
459            } else if (header.indexOf("total") == 0) {
460                setType(UNIX_LIST_TYPE);
461                unixFTPEntryParser = new UnixFTPEntryParser();
462            } else if (header.indexOf("Spool Files") >= 30) {
463                setType(JES_LEVEL_1_LIST_TYPE);
464                super.setRegex(JES_LEVEL_1_LIST_REGEX);
465            } else if (header.indexOf("JOBNAME") == 0
466                    && header.indexOf("JOBID") > 8) {// header contains JOBNAME JOBID OWNER // STATUS CLASS
467                setType(JES_LEVEL_2_LIST_TYPE);
468                super.setRegex(JES_LEVEL_2_LIST_REGEX);
469            } else {
470                setType(UNKNOWN_LIST_TYPE);
471            }
472
473            if (isType != JES_LEVEL_1_LIST_TYPE) { // remove header is necessary
474                orig.remove(0);
475            }
476        }
477
478        return orig;
479    }
480
481    /**
482     * Explicitly set the type of listing being processed.
483     * @param type The listing type.
484     */
485    void setType(int type) {
486        isType = type;
487    }
488
489    /*
490     * @return
491     */
492    @Override
493    protected FTPClientConfig getDefaultConfiguration() {
494        return new FTPClientConfig(FTPClientConfig.SYST_MVS,
495                DEFAULT_DATE_FORMAT, null, null, null, null);
496    }
497
498}