1 package org.apache.lucene.index;
3 import java.util.regex.Pattern;
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements. See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * This class contains useful constants representing filenames and extensions
24 * used by lucene, as well as convenience methods for querying whether a file
25 * name matches an extension ({@link #matchesExtension(String, String)
26 * matchesExtension}), as well as generating file names from a segment name,
27 * generation and extension (
28 * {@link #fileNameFromGeneration(String, String, long) fileNameFromGeneration},
29 * {@link #segmentFileName(String, String) segmentFileName}).
33 public final class IndexFileNames {
35 /** Name of the index segment file */
36 public static final String SEGMENTS = "segments";
38 /** Name of the generation reference file name */
39 public static final String SEGMENTS_GEN = "segments.gen";
41 /** Name of the index deletable file (only used in
42 * pre-lockless indices) */
43 public static final String DELETABLE = "deletable";
45 /** Extension of norms file */
46 public static final String NORMS_EXTENSION = "nrm";
48 /** Extension of freq postings file */
49 public static final String FREQ_EXTENSION = "frq";
51 /** Extension of prox postings file */
52 public static final String PROX_EXTENSION = "prx";
54 /** Extension of terms file */
55 public static final String TERMS_EXTENSION = "tis";
57 /** Extension of terms index file */
58 public static final String TERMS_INDEX_EXTENSION = "tii";
60 /** Extension of stored fields index file */
61 public static final String FIELDS_INDEX_EXTENSION = "fdx";
63 /** Extension of stored fields file */
64 public static final String FIELDS_EXTENSION = "fdt";
66 /** Extension of vectors fields file */
67 public static final String VECTORS_FIELDS_EXTENSION = "tvf";
69 /** Extension of vectors documents file */
70 public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
72 /** Extension of vectors index file */
73 public static final String VECTORS_INDEX_EXTENSION = "tvx";
75 /** Extension of compound file */
76 public static final String COMPOUND_FILE_EXTENSION = "cfs";
78 /** Extension of compound file for doc store files*/
79 public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
81 /** Extension of deletes */
82 public static final String DELETES_EXTENSION = "del";
84 /** Extension of field infos */
85 public static final String FIELD_INFOS_EXTENSION = "fnm";
87 /** Extension of plain norms */
88 public static final String PLAIN_NORMS_EXTENSION = "f";
90 /** Extension of separate norms */
91 public static final String SEPARATE_NORMS_EXTENSION = "s";
93 /** Extension of gen file */
94 public static final String GEN_EXTENSION = "gen";
97 * This array contains all filename extensions used by
98 * Lucene's index files, with two exceptions, namely the
99 * extension made up from <code>.f</code> + a number and
100 * from <code>.s</code> + a number. Also note that
101 * Lucene's <code>segments_N</code> files do not have any
102 * filename extension.
104 public static final String INDEX_EXTENSIONS[] = new String[] {
105 COMPOUND_FILE_EXTENSION,
106 FIELD_INFOS_EXTENSION,
107 FIELDS_INDEX_EXTENSION,
109 TERMS_INDEX_EXTENSION,
114 VECTORS_INDEX_EXTENSION,
115 VECTORS_DOCUMENTS_EXTENSION,
116 VECTORS_FIELDS_EXTENSION,
119 COMPOUND_FILE_STORE_EXTENSION,
122 /** File extensions that are added to a compound file
123 * (same as above, minus "del", "gen", "cfs"). */
124 public static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
125 FIELD_INFOS_EXTENSION,
126 FIELDS_INDEX_EXTENSION,
128 TERMS_INDEX_EXTENSION,
132 VECTORS_INDEX_EXTENSION,
133 VECTORS_DOCUMENTS_EXTENSION,
134 VECTORS_FIELDS_EXTENSION,
138 public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
139 VECTORS_INDEX_EXTENSION,
140 VECTORS_FIELDS_EXTENSION,
141 VECTORS_DOCUMENTS_EXTENSION,
142 FIELDS_INDEX_EXTENSION,
146 public static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
147 FIELD_INFOS_EXTENSION,
151 TERMS_INDEX_EXTENSION,
155 /** File extensions of old-style index files */
156 public static final String COMPOUND_EXTENSIONS[] = new String[] {
157 FIELD_INFOS_EXTENSION,
160 FIELDS_INDEX_EXTENSION,
162 TERMS_INDEX_EXTENSION,
166 /** File extensions for term vector support */
167 public static final String VECTOR_EXTENSIONS[] = new String[] {
168 VECTORS_INDEX_EXTENSION,
169 VECTORS_DOCUMENTS_EXTENSION,
170 VECTORS_FIELDS_EXTENSION
174 * Computes the full file name from base, extension and generation. If the
175 * generation is -1, the file name is null. If it's 0, the file name is
176 * <base>.<ext>. If it's > 0, the file name is
177 * <base>_<gen>.<ext>.<br>
178 * <b>NOTE:</b> .<ext> is added to the name only if <code>ext</code> is
179 * not an empty string.
181 * @param base main part of the file name
182 * @param ext extension of the filename
183 * @param gen generation
185 public static final String fileNameFromGeneration(String base, String ext, long gen) {
186 if (gen == SegmentInfo.NO) {
188 } else if (gen == SegmentInfo.WITHOUT_GEN) {
189 return segmentFileName(base, ext);
191 // The '6' part in the length is: 1 for '.', 1 for '_' and 4 as estimate
192 // to the gen length as string (hopefully an upper limit so SB won't
193 // expand in the middle.
194 StringBuilder res = new StringBuilder(base.length() + 6 + ext.length())
195 .append(base).append('_').append(Long.toString(gen, Character.MAX_RADIX));
196 if (ext.length() > 0) {
197 res.append('.').append(ext);
199 return res.toString();
204 * Returns true if the provided filename is one of the doc store files (ends
205 * with an extension in {@link #STORE_INDEX_EXTENSIONS}).
207 public static final boolean isDocStoreFile(String fileName) {
208 if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION))
210 for (String ext : STORE_INDEX_EXTENSIONS) {
211 if (fileName.endsWith(ext))
218 * Returns the file name that matches the given segment name and extension.
219 * This method takes care to return the full file name in the form
220 * <segmentName>.<ext>, therefore you don't need to prefix the
221 * extension with a '.'.<br>
222 * <b>NOTE:</b> .<ext> is added to the result file name only if
223 * <code>ext</code> is not empty.
225 public static final String segmentFileName(String segmentName, String ext) {
226 if (ext.length() > 0) {
227 return new StringBuilder(segmentName.length() + 1 + ext.length()).append(
228 segmentName).append('.').append(ext).toString();
235 * Returns true if the given filename ends with the given extension. One
236 * should provide a <i>pure</i> extension, without '.'.
238 public static final boolean matchesExtension(String filename, String ext) {
239 // It doesn't make a difference whether we allocate a StringBuilder ourself
240 // or not, since there's only 1 '+' operator.
241 return filename.endsWith("." + ext);
245 * Strips the segment file name out of the given one. If you used
246 * {@link #segmentFileName} or {@link #fileNameFromGeneration} to create your
247 * files, then this method simply removes whatever comes before the first '.',
248 * or the second '_' (excluding both), in case of deleted docs.
250 * @return the filename with the segment name removed, or the given filename
251 * if it does not contain a '.' and '_'.
253 public static final String stripSegmentName(String filename) {
254 // If it is a .del file, there's an '_' after the first character
255 int idx = filename.indexOf('_', 1);
257 // If it's not, strip everything that's before the '.'
258 idx = filename.indexOf('.');
261 filename = filename.substring(idx);
267 * Returns true if the given filename ends with the separate norms file
268 * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
270 public static boolean isSeparateNormsFile(String filename) {
271 int idx = filename.lastIndexOf('.');
272 if (idx == -1) return false;
273 String ext = filename.substring(idx + 1);
274 return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);