blob: 77d46b17ea3cd5660a607b2a3f770732ea91e206 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2019,2021 IBM Corporation.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation (Andrew Johnson) - initial API and implementation
*******************************************************************************/
package org.eclipse.mat.hprof;
import java.io.File;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.io.UncheckedIOException;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.util.function.Supplier;
import java.util.zip.GZIPInputStream;
/**
* Creates an unzipped view of a Gzipped file.
* Probably quite slow for random access, okay for streaming access.
* Package class.
* Do not call any methods other than
* {@link #seek(long)}
* {@link #getFilePointer()}
* {@link #length()} - not necessarily accurate
* {@link #read(byte[])}
* {@link #read(byte[], int, int)}
* {@link #close()}
*/
class CompressedRandomAccessFile extends RandomAccessFile
{
SeekableStream ss;
/**
* Create an unzipped view of the gzipped file, using multiple
* gzipped readers to obtain the uncompressed data.
* @param file
* @param hint for random access
* @param length estimate
* @throws IOException
*/
public CompressedRandomAccessFile(File file, boolean random, long length) throws IOException
{
super(file, "r"); //$NON-NLS-1$
FileChannel ch = getChannel();
// length of file on disk - don't find length after decompression as expensive
// and don't know it yet
long len = ch.size();
// Each SeekableSteam input decompressor probably uses 64kB.
long decompSize = 65536;
int cacheSize = (int)Math.min(Math.min(len / 100000, 1000) + len / 1000000, 1000000);
// Also limit the cache according to memory
// Limit to 1/4 spare memory
long required = cacheSize * decompSize * 4;
long maxFree = checkMemSpace(required);
if (required > maxFree)
cacheSize = (int)(maxFree / decompSize / 4);
ss = new SeekableStream(new Supplier<InputStream>()
{
public InputStream get()
{
try
{
/*
* Create a stream view of the channel.
* Important - changing position via channel
* must change position of input stream, so
* no buffering.
* Add mark support.
*/
InputStream is = new FilterInputStream(Channels.newInputStream(ch)) {
long mark_pos;
@Override
public boolean markSupported()
{
return true;
}
public void mark(int n)
{
try
{
mark_pos = ch.position();
}
catch (IOException e)
{
mark_pos = -1;
}
}
public void reset() throws IOException
{
ch.position(mark_pos);
}
};
InputStream is2 = new SeekableStream.UnclosableInputStream(is);
// GZIPInputStream2 can save positions mid stream
// GZIPInputStream is faster for linear access
return random ? new GZIPInputStream2(is2) : new GZIPInputStream(is2);
}
catch (IOException e)
{
throw new UncheckedIOException(e);
}
}
}, ch, cacheSize, length);
}
@Override
public void seek(long pos) throws IOException
{
ss.seek(pos);
}
@Override
public long getFilePointer()
{
return ss.position();
}
/**
* Unknown length is Long.MAX_VALUE
*/
@Override
public long length()
{
return Long.MAX_VALUE;
}
@Override
public int read(byte buf[]) throws IOException
{
return ss.read(buf);
}
@Override
public int read(byte buf[], int off, int len) throws IOException
{
return ss.read(buf, off, len);
}
public void close() throws IOException
{
ss.close();
super.close();
}
/**
* Estimate the length of a file, including GZip without
* decompressing the whole file.
* @param f
* @return
* @throws IOException
*/
public static long estimatedLength(File f) throws IOException
{
try (RandomAccessFile ra = new RandomAccessFile(f, "r")) //$NON-NLS-1$
{
return estimatedLength(ra);
}
}
/**
* Estimates the work to do when reading the file. For compressed files it is
* not easy to do this based on the original size, since it is not generally
* known. Instead for these we use the physical size and the physical read
* position (at least for the chunked gzipped files).
*
* @param f The file
* @return The work to do.
* @throws IOException On error.
*/
public static long estimateWork(File f) throws IOException
{
try (RandomAccessFile ra = new RandomAccessFile(f, "r")) //$NON-NLS-1$
{
if (ChunkedGZIPRandomAccessFile.isChunkedGZIPFile(ra))
{
return f.length();
}
return estimatedLength(ra);
}
}
/**
* Estimate the length of the uncompressed version of the Gzipped file.
* Gzip only has the least significant 32-bits of the size.
* Estimate the size as 5.0 times the compressed size, but with the
* same least significant 32-bits.
* @param ra The file
* @return the estimated uncompresed size
* @throws IOException
*/
static long estimatedLength(RandomAccessFile ra) throws IOException
{
if (ChunkedGZIPRandomAccessFile.isChunkedGZIPFile(ra))
{
// Don't try to estimate the length if the file contains more than one
// gzip "member", since it will never be right.
return Long.MAX_VALUE;
}
long filel = ra.length();
long pos = ra.getFilePointer();
try
{
boolean gzip = isGZIP(ra);
if (gzip)
{
ra.seek(filel - 4);
int r1 = ra.read();
int r2 = ra.read();
int r3 = ra.read();
int r4 = ra.read();
// Least significant 32 bits of original length
long len32 = ((long) (r4 & 0xff) << 24) + ((r3 & 0xff) << 16) + ((r2 & 0xff) << 8) + (r1 & 0xff);
// Estimated decompression factor
long estimate = (long) (filel * 5.0);
// Now insert least significant 32 bits
long e1 = (estimate & ~0xffffffffL) + len32;
// and choose the closest value with those bits
long e2 = e1 + 0x100000000L;
long e3 = e1 - 0x100000000L;
long best = e1;
if (Math.abs(e2 - estimate) < Math.abs(best - estimate))
best = e2;
if (e3 >= 0 && Math.abs(e3 - estimate) < Math.abs(best - estimate))
best = e3;
/*
* Attempt to detect a chunked file.
*/
if ((len32 <= filel || len32 <= 1024 * 1024))
best = Long.MAX_VALUE;
return best;
}
else
{
return filel;
}
}
finally
{
ra.seek(pos);
}
}
static boolean isGZIP(RandomAccessFile ra) throws IOException
{
long pos = ra.getFilePointer();
try
{
if (pos != 0)
ra.seek(0);
int b1 = ra.read();
int b2 = ra.read();
int b3 = ra.read();
boolean gzip = b1 == 0x1f && b2 == 0x8b && b3 == 0x08;
return gzip;
}
finally
{
ra.seek(pos);
}
}
/**
* Check whether there is at least the requested amount of
* memory available.
* @param requested
* @return memory available
*/
static long checkMemSpace(long requested)
{
Runtime runtime = Runtime.getRuntime();
long maxFree = runtime.maxMemory() - (runtime.totalMemory() - runtime.freeMemory());
if (maxFree < requested)
{
runtime.gc();
maxFree = runtime.maxMemory() - (runtime.totalMemory() - runtime.freeMemory());
return maxFree;
}
return maxFree;
}
}