1 package org.apache.lucene.store;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import java.io.RandomAccessFile;
23 import java.nio.ByteBuffer;
24 import java.nio.BufferUnderflowException;
25 import java.nio.channels.ClosedChannelException; // javadoc
26 import java.nio.channels.FileChannel;
27 import java.nio.channels.FileChannel.MapMode;
29 import java.security.AccessController;
30 import java.security.PrivilegedExceptionAction;
31 import java.security.PrivilegedActionException;
32 import java.lang.reflect.Method;
34 import org.apache.lucene.util.Constants;
36 /** File-based {@link Directory} implementation that uses
37 * mmap for reading, and {@link
38 * FSDirectory.FSIndexOutput} for writing.
40 * <p><b>NOTE</b>: memory mapping uses up a portion of the
41 * virtual memory address space in your process equal to the
42 * size of the file being mapped. Before using this class,
43 * be sure your have plenty of virtual address space, e.g. by
44 * using a 64 bit JRE, or a 32 bit JRE with indexes that are
45 * guaranteed to fit within the address space.
46 * On 32 bit platforms also consult {@link #setMaxChunkSize}
47 * if you have problems with mmap failing because of fragmented
48 * address space. If you get an OutOfMemoryException, it is recommended
49 * to reduce the chunk size, until it works.
51 * <p>Due to <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">
52 * this bug</a> in Sun's JRE, MMapDirectory's {@link IndexInput#close}
53 * is unable to close the underlying OS file handle. Only when GC
54 * finally collects the underlying objects, which could be quite
55 * some time later, will the file handle be closed.
57 * <p>This will consume additional transient disk usage: on Windows,
58 * attempts to delete or overwrite the files will result in an
59 * exception; on other platforms, which typically have a "delete on
60 * last close" semantics, while such operations will succeed, the bytes
61 * are still consuming space on disk. For many applications this
62 * limitation is not a problem (e.g. if you have plenty of disk space,
63 * and you don't rely on overwriting files on Windows) but it's still
64 * an important limitation to be aware of.
66 * <p>This class supplies the workaround mentioned in the bug report
67 * (see {@link #setUseUnmap}), which may fail on
68 * non-Sun JVMs. It forcefully unmaps the buffer on close by using
69 * an undocumented internal cleanup functionality.
70 * {@link #UNMAP_SUPPORTED} is <code>true</code>, if the workaround
71 * can be enabled (with no guarantees).
73 * <b>NOTE:</b> Accessing this class either directly or
74 * indirectly from a thread while it's interrupted can close the
75 * underlying channel immediately if at the same time the thread is
76 * blocked on IO. The channel will remain closed and subsequent access
77 * to {@link MMapDirectory} will throw a {@link ClosedChannelException}.
80 public class MMapDirectory extends FSDirectory {
81 private boolean useUnmapHack = UNMAP_SUPPORTED;
82 public static final int DEFAULT_MAX_BUFF = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28);
83 private int chunkSizePower;
85 /** Create a new MMapDirectory for the named location.
87 * @param path the path of the directory
88 * @param lockFactory the lock factory to use, or null for the default
89 * ({@link NativeFSLockFactory});
92 public MMapDirectory(File path, LockFactory lockFactory) throws IOException {
93 super(path, lockFactory);
94 setMaxChunkSize(DEFAULT_MAX_BUFF);
97 /** Create a new MMapDirectory for the named location and {@link NativeFSLockFactory}.
99 * @param path the path of the directory
100 * @throws IOException
102 public MMapDirectory(File path) throws IOException {
104 setMaxChunkSize(DEFAULT_MAX_BUFF);
108 * <code>true</code>, if this platform supports unmapping mmapped files.
110 public static final boolean UNMAP_SUPPORTED;
114 Class.forName("sun.misc.Cleaner");
115 Class.forName("java.nio.DirectByteBuffer")
116 .getMethod("cleaner");
118 } catch (Exception e) {
125 * This method enables the workaround for unmapping the buffers
126 * from address space after closing {@link IndexInput}, that is
127 * mentioned in the bug report. This hack may fail on non-Sun JVMs.
128 * It forcefully unmaps the buffer on close by using
129 * an undocumented internal cleanup functionality.
130 * <p><b>NOTE:</b> Enabling this is completely unsupported
131 * by Java and may lead to JVM crashes if <code>IndexInput</code>
132 * is closed while another thread is still accessing it (SIGSEGV).
133 * @throws IllegalArgumentException if {@link #UNMAP_SUPPORTED}
134 * is <code>false</code> and the workaround cannot be enabled.
136 public void setUseUnmap(final boolean useUnmapHack) {
137 if (useUnmapHack && !UNMAP_SUPPORTED)
138 throw new IllegalArgumentException("Unmap hack not supported on this platform!");
139 this.useUnmapHack=useUnmapHack;
143 * Returns <code>true</code>, if the unmap workaround is enabled.
146 public boolean getUseUnmap() {
151 * Try to unmap the buffer, this method silently fails if no support
152 * for that in the JVM. On Windows, this leads to the fact,
153 * that mmapped files cannot be modified or deleted.
155 final void cleanMapping(final ByteBuffer buffer) throws IOException {
158 AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
159 public Object run() throws Exception {
160 final Method getCleanerMethod = buffer.getClass()
161 .getMethod("cleaner");
162 getCleanerMethod.setAccessible(true);
163 final Object cleaner = getCleanerMethod.invoke(buffer);
164 if (cleaner != null) {
165 cleaner.getClass().getMethod("clean")
171 } catch (PrivilegedActionException e) {
172 final IOException ioe = new IOException("unable to unmap the mapped buffer");
173 ioe.initCause(e.getCause());
180 * Sets the maximum chunk size (default is {@link Integer#MAX_VALUE} for
181 * 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping.
182 * Especially on 32 bit platform, the address space can be very fragmented,
183 * so large index files cannot be mapped.
184 * Using a lower chunk size makes the directory implementation a little
185 * bit slower (as the correct chunk may be resolved on lots of seeks)
186 * but the chance is higher that mmap does not fail. On 64 bit
187 * Java platforms, this parameter should always be {@code 1 << 30},
188 * as the address space is big enough.
189 * <b>Please note:</b> This method always rounds down the chunk size
192 public final void setMaxChunkSize(final int maxChunkSize) {
193 if (maxChunkSize <= 0)
194 throw new IllegalArgumentException("Maximum chunk size for mmap must be >0");
195 //System.out.println("Requested chunk size: "+maxChunkSize);
196 this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize);
197 assert this.chunkSizePower >= 0 && this.chunkSizePower <= 30;
198 //System.out.println("Got chunk size: "+getMaxChunkSize());
202 * Returns the current mmap chunk size.
203 * @see #setMaxChunkSize
205 public final int getMaxChunkSize() {
206 return 1 << chunkSizePower;
209 /** Creates an IndexInput for the file with the given name. */
211 public IndexInput openInput(String name, int bufferSize) throws IOException {
213 File f = new File(getDirectory(), name);
214 RandomAccessFile raf = new RandomAccessFile(f, "r");
216 return new MMapIndexInput(raf, chunkSizePower);
222 // Because Java's ByteBuffer uses an int to address the
223 // values, it's necessary to access a file >
224 // Integer.MAX_VALUE in size using multiple byte buffers.
225 private final class MMapIndexInput extends IndexInput {
227 private ByteBuffer[] buffers;
229 private final long length, chunkSizeMask, chunkSize;
230 private final int chunkSizePower;
232 private int curBufIndex;
234 private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex]
236 private boolean isClone = false;
238 MMapIndexInput(RandomAccessFile raf, int chunkSizePower) throws IOException {
239 this.length = raf.length();
240 this.chunkSizePower = chunkSizePower;
241 this.chunkSize = 1L << chunkSizePower;
242 this.chunkSizeMask = chunkSize - 1L;
244 if (chunkSizePower < 0 || chunkSizePower > 30)
245 throw new IllegalArgumentException("Invalid chunkSizePower used for ByteBuffer size: " + chunkSizePower);
247 if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
248 throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + raf.toString());
250 // we always allocate one more buffer, the last one may be a 0 byte one
251 final int nrBuffers = (int) (length >>> chunkSizePower) + 1;
253 //System.out.println("length="+length+", chunkSizePower=" + chunkSizePower + ", chunkSizeMask=" + chunkSizeMask + ", nrBuffers=" + nrBuffers);
255 this.buffers = new ByteBuffer[nrBuffers];
257 long bufferStart = 0L;
258 FileChannel rafc = raf.getChannel();
259 for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
260 int bufSize = (int) ( (length > (bufferStart + chunkSize))
262 : (length - bufferStart)
264 this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, bufferStart, bufSize);
265 bufferStart += bufSize;
271 public byte readByte() throws IOException {
274 } catch (BufferUnderflowException e) {
277 if (curBufIndex >= buffers.length)
278 throw new IOException("read past EOF");
279 curBuf = buffers[curBufIndex];
281 } while (!curBuf.hasRemaining());
287 public void readBytes(byte[] b, int offset, int len) throws IOException {
289 curBuf.get(b, offset, len);
290 } catch (BufferUnderflowException e) {
291 int curAvail = curBuf.remaining();
292 while (len > curAvail) {
293 curBuf.get(b, offset, curAvail);
297 if (curBufIndex >= buffers.length)
298 throw new IOException("read past EOF");
299 curBuf = buffers[curBufIndex];
301 curAvail = curBuf.remaining();
303 curBuf.get(b, offset, len);
308 public int readInt() throws IOException {
310 return curBuf.getInt();
311 } catch (BufferUnderflowException e) {
312 return super.readInt();
317 public long readLong() throws IOException {
319 return curBuf.getLong();
320 } catch (BufferUnderflowException e) {
321 return super.readLong();
326 public long getFilePointer() {
327 return (((long) curBufIndex) << chunkSizePower) + curBuf.position();
331 public void seek(long pos) throws IOException {
332 // we use >> here to preserve negative, so we will catch AIOOBE:
333 final int bi = (int) (pos >> chunkSizePower);
335 final ByteBuffer b = buffers[bi];
336 b.position((int) (pos & chunkSizeMask));
337 // write values, on exception all is unchanged
338 this.curBufIndex = bi;
340 } catch (ArrayIndexOutOfBoundsException aioobe) {
342 throw new IllegalArgumentException("Seeking to negative position");
343 throw new IOException("seek past EOF");
344 } catch (IllegalArgumentException iae) {
346 throw new IllegalArgumentException("Seeking to negative position");
347 throw new IOException("seek past EOF");
352 public long length() {
357 public Object clone() {
359 throw new AlreadyClosedException("MMapIndexInput already closed");
360 final MMapIndexInput clone = (MMapIndexInput)super.clone();
361 clone.isClone = true;
362 clone.buffers = new ByteBuffer[buffers.length];
363 // Since most clones will use only one buffer, duplicate() could also be
364 // done lazy in clones, e.g. when adapting curBuf.
365 for (int bufNr = 0; bufNr < buffers.length; bufNr++) {
366 clone.buffers[bufNr] = buffers[bufNr].duplicate();
369 clone.seek(getFilePointer());
370 } catch(IOException ioe) {
371 throw new RuntimeException("Should never happen", ioe);
377 public void close() throws IOException {
379 if (isClone || buffers == null) return;
380 for (int bufNr = 0; bufNr < buffers.length; bufNr++) {
381 // unmap the buffer (if enabled) and at least unset it for GC
383 cleanMapping(buffers[bufNr]);
385 buffers[bufNr] = null;