001 /* GZIPInputStream.java - Input filter for reading gzip file 002 Copyright (C) 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc. 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 039 package java.util.zip; 040 041 import java.io.EOFException; 042 import java.io.IOException; 043 import java.io.InputStream; 044 045 /** 046 * This filter stream is used to decompress a "GZIP" format stream. 047 * The "GZIP" format is described in RFC 1952. 048 * 049 * @author John Leuner 050 * @author Tom Tromey 051 * @since JDK 1.1 052 */ 053 public class GZIPInputStream 054 extends InflaterInputStream 055 { 056 /** 057 * The magic number found at the start of a GZIP stream. 058 */ 059 public static final int GZIP_MAGIC = 0x8b1f; 060 061 /** 062 * The mask for bit 0 of the flag byte. 063 */ 064 static final int FTEXT = 0x1; 065 066 /** 067 * The mask for bit 1 of the flag byte. 068 */ 069 static final int FHCRC = 0x2; 070 071 /** 072 * The mask for bit 2 of the flag byte. 073 */ 074 static final int FEXTRA = 0x4; 075 076 /** 077 * The mask for bit 3 of the flag byte. 078 */ 079 static final int FNAME = 0x8; 080 081 /** 082 * The mask for bit 4 of the flag byte. 083 */ 084 static final int FCOMMENT = 0x10; 085 086 /** 087 * The CRC-32 checksum value for uncompressed data. 088 */ 089 protected CRC32 crc; 090 091 /** 092 * Indicates whether or not the end of the stream has been reached. 093 */ 094 protected boolean eos; 095 096 /** 097 * Indicates whether or not the GZIP header has been read in. 098 */ 099 private boolean readGZIPHeader; 100 101 /** 102 * Creates a GZIPInputStream with the default buffer size. 103 * 104 * @param in The stream to read compressed data from 105 * (in GZIP format). 106 * 107 * @throws IOException if an error occurs during an I/O operation. 108 */ 109 public GZIPInputStream(InputStream in) 110 throws IOException 111 { 112 this(in, 4096); 113 } 114 115 /** 116 * Creates a GZIPInputStream with the specified buffer size. 117 * 118 * @param in The stream to read compressed data from 119 * (in GZIP format). 120 * @param size The size of the buffer to use. 121 * 122 * @throws IOException if an error occurs during an I/O operation. 123 * @throws IllegalArgumentException if <code>size</code> 124 * is less than or equal to 0. 125 */ 126 public GZIPInputStream(InputStream in, int size) 127 throws IOException 128 { 129 super(in, new Inflater(true), size); 130 crc = new CRC32(); 131 readHeader(); 132 } 133 134 /** 135 * Closes the input stream. 136 * 137 * @throws IOException if an error occurs during an I/O operation. 138 */ 139 public void close() 140 throws IOException 141 { 142 // Nothing to do here. 143 super.close(); 144 } 145 146 /** 147 * Reads in GZIP-compressed data and stores it in uncompressed form 148 * into an array of bytes. The method will block until either 149 * enough input data becomes available or the compressed stream 150 * reaches its end. 151 * 152 * @param buf the buffer into which the uncompressed data will 153 * be stored. 154 * @param offset the offset indicating where in <code>buf</code> 155 * the uncompressed data should be placed. 156 * @param len the number of uncompressed bytes to be read. 157 */ 158 public int read(byte[] buf, int offset, int len) throws IOException 159 { 160 // We first have to slurp in the GZIP header, then we feed all the 161 // rest of the data to the superclass. 162 // 163 // As we do that we continually update the CRC32. Once the data is 164 // finished, we check the CRC32. 165 // 166 // This means we don't need our own buffer, as everything is done 167 // in the superclass. 168 if (!readGZIPHeader) 169 readHeader(); 170 171 if (eos) 172 return -1; 173 174 // System.err.println("GZIPIS.read(byte[], off, len ... " + offset + " and len " + len); 175 176 /* We don't have to read the header, 177 * so we just grab data from the superclass. 178 */ 179 int numRead = super.read(buf, offset, len); 180 if (numRead > 0) 181 crc.update(buf, offset, numRead); 182 183 if (inf.finished()) 184 readFooter(); 185 return numRead; 186 } 187 188 189 /** 190 * Reads in the GZIP header. 191 */ 192 private void readHeader() throws IOException 193 { 194 /* 1. Check the two magic bytes */ 195 CRC32 headCRC = new CRC32(); 196 int magic = in.read(); 197 if (magic < 0) 198 { 199 eos = true; 200 return; 201 } 202 int magic2 = in.read(); 203 if ((magic + (magic2 << 8)) != GZIP_MAGIC) 204 throw new IOException("Error in GZIP header, bad magic code"); 205 headCRC.update(magic); 206 headCRC.update(magic2); 207 208 /* 2. Check the compression type (must be 8) */ 209 int CM = in.read(); 210 if (CM != Deflater.DEFLATED) 211 throw new IOException("Error in GZIP header, data not in deflate format"); 212 headCRC.update(CM); 213 214 /* 3. Check the flags */ 215 int flags = in.read(); 216 if (flags < 0) 217 throw new EOFException("Early EOF in GZIP header"); 218 headCRC.update(flags); 219 220 /* This flag byte is divided into individual bits as follows: 221 222 bit 0 FTEXT 223 bit 1 FHCRC 224 bit 2 FEXTRA 225 bit 3 FNAME 226 bit 4 FCOMMENT 227 bit 5 reserved 228 bit 6 reserved 229 bit 7 reserved 230 */ 231 232 /* 3.1 Check the reserved bits are zero */ 233 if ((flags & 0xd0) != 0) 234 throw new IOException("Reserved flag bits in GZIP header != 0"); 235 236 /* 4.-6. Skip the modification time, extra flags, and OS type */ 237 for (int i=0; i< 6; i++) 238 { 239 int readByte = in.read(); 240 if (readByte < 0) 241 throw new EOFException("Early EOF in GZIP header"); 242 headCRC.update(readByte); 243 } 244 245 /* 7. Read extra field */ 246 if ((flags & FEXTRA) != 0) 247 { 248 /* Skip subfield id */ 249 for (int i=0; i< 2; i++) 250 { 251 int readByte = in.read(); 252 if (readByte < 0) 253 throw new EOFException("Early EOF in GZIP header"); 254 headCRC.update(readByte); 255 } 256 if (in.read() < 0 || in.read() < 0) 257 throw new EOFException("Early EOF in GZIP header"); 258 259 int len1, len2, extraLen; 260 len1 = in.read(); 261 len2 = in.read(); 262 if ((len1 < 0) || (len2 < 0)) 263 throw new EOFException("Early EOF in GZIP header"); 264 headCRC.update(len1); 265 headCRC.update(len2); 266 267 extraLen = (len1 << 8) | len2; 268 for (int i = 0; i < extraLen;i++) 269 { 270 int readByte = in.read(); 271 if (readByte < 0) 272 throw new EOFException("Early EOF in GZIP header"); 273 headCRC.update(readByte); 274 } 275 } 276 277 /* 8. Read file name */ 278 if ((flags & FNAME) != 0) 279 { 280 int readByte; 281 while ( (readByte = in.read()) > 0) 282 headCRC.update(readByte); 283 if (readByte < 0) 284 throw new EOFException("Early EOF in GZIP file name"); 285 headCRC.update(readByte); 286 } 287 288 /* 9. Read comment */ 289 if ((flags & FCOMMENT) != 0) 290 { 291 int readByte; 292 while ( (readByte = in.read()) > 0) 293 headCRC.update(readByte); 294 295 if (readByte < 0) 296 throw new EOFException("Early EOF in GZIP comment"); 297 headCRC.update(readByte); 298 } 299 300 /* 10. Read header CRC */ 301 if ((flags & FHCRC) != 0) 302 { 303 int tempByte; 304 int crcval = in.read(); 305 if (crcval < 0) 306 throw new EOFException("Early EOF in GZIP header"); 307 308 tempByte = in.read(); 309 if (tempByte < 0) 310 throw new EOFException("Early EOF in GZIP header"); 311 312 crcval = (crcval << 8) | tempByte; 313 if (crcval != ((int) headCRC.getValue() & 0xffff)) 314 throw new IOException("Header CRC value mismatch"); 315 } 316 317 readGZIPHeader = true; 318 //System.err.println("Read GZIP header"); 319 } 320 321 private void readFooter() throws IOException 322 { 323 byte[] footer = new byte[8]; 324 int avail = inf.getRemaining(); 325 if (avail > 8) 326 avail = 8; 327 System.arraycopy(buf, len - inf.getRemaining(), footer, 0, avail); 328 int needed = 8 - avail; 329 while (needed > 0) 330 { 331 int count = in.read(footer, 8-needed, needed); 332 if (count <= 0) 333 throw new EOFException("Early EOF in GZIP footer"); 334 needed -= count; //Jewel Jan 16 335 } 336 337 int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) 338 | ((footer[2] & 0xff) << 16) | (footer[3] << 24); 339 if (crcval != (int) crc.getValue()) 340 throw new IOException("GZIP crc sum mismatch, theirs \"" 341 + Integer.toHexString(crcval) 342 + "\" and ours \"" 343 + Integer.toHexString( (int) crc.getValue())); 344 345 int total = (footer[4] & 0xff) | ((footer[5] & 0xff) << 8) 346 | ((footer[6] & 0xff) << 16) | (footer[7] << 24); 347 if (total != inf.getTotalOut()) 348 throw new IOException("Number of bytes mismatch"); 349 350 /* FIXME" XXX Should we support multiple members. 351 * Difficult, since there may be some bytes still in buf 352 */ 353 eos = true; 354 } 355 }