1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.SortedSet;
23
24 import org.apache.hadoop.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.KeyValue;
26 import org.apache.hadoop.hbase.client.Scan;
27
28 /**
29 * Scanner that returns the next KeyValue.
30 */
31 @InterfaceAudience.Private
32 public interface KeyValueScanner {
33 /**
34 * Look at the next KeyValue in this scanner, but do not iterate scanner.
35 * @return the next KeyValue
36 */
37 KeyValue peek();
38
39 /**
40 * Return the next KeyValue in this scanner, iterating the scanner
41 * @return the next KeyValue
42 */
43 KeyValue next() throws IOException;
44
45 /**
46 * Seek the scanner at or after the specified KeyValue.
47 * @param key seek value
48 * @return true if scanner has values left, false if end of scanner
49 */
50 boolean seek(KeyValue key) throws IOException;
51
52 /**
53 * Reseek the scanner at or after the specified KeyValue.
54 * This method is guaranteed to seek at or after the required key only if the
55 * key comes after the current position of the scanner. Should not be used
56 * to seek to a key which may come before the current position.
57 * @param key seek value (should be non-null)
58 * @return true if scanner has values left, false if end of scanner
59 */
60 boolean reseek(KeyValue key) throws IOException;
61
62 /**
63 * Get the sequence id associated with this KeyValueScanner. This is required
64 * for comparing multiple files to find out which one has the latest data.
65 * The default implementation for this would be to return 0. A file having
66 * lower sequence id will be considered to be the older one.
67 */
68 long getSequenceID();
69
70 /**
71 * Close the KeyValue scanner.
72 */
73 void close();
74
75 /**
76 * Allows to filter out scanners (both StoreFile and memstore) that we don't
77 * want to use based on criteria such as Bloom filters and timestamp ranges.
78 * @param scan the scan that we are selecting scanners for
79 * @param columns the set of columns in the current column family, or null if
80 * not specified by the scan
81 * @param oldestUnexpiredTS the oldest timestamp we are interested in for
82 * this query, based on TTL
83 * @return true if the scanner should be included in the query
84 */
85 boolean shouldUseScanner(
86 Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS
87 );
88
89 // "Lazy scanner" optimizations
90
91 /**
92 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only
93 * does a seek operation after checking that it is really necessary for the
94 * row/column combination specified by the kv parameter. This function was
95 * added to avoid unnecessary disk seeks by checking row-column Bloom filters
96 * before a seek on multi-column get/scan queries, and to optimize by looking
97 * up more recent files first.
98 * @param forward do a forward-only "reseek" instead of a random-access seek
99 * @param useBloom whether to enable multi-column Bloom filter optimization
100 */
101 boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
102 throws IOException;
103
104 /**
105 * We optimize our store scanners by checking the most recent store file
106 * first, so we sometimes pretend we have done a seek but delay it until the
107 * store scanner bubbles up to the top of the key-value heap. This method is
108 * then used to ensure the top store file scanner has done a seek operation.
109 */
110 boolean realSeekDone();
111
112 /**
113 * Does the real seek operation in case it was skipped by
114 * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should
115 * be never called on scanners that always do real seek operations (i.e. most
116 * of the scanners). The easiest way to achieve this is to call
117 * {@link #realSeekDone()} first.
118 */
119 void enforceSeek() throws IOException;
120
121 /**
122 * @return true if this is a file scanner. Otherwise a memory scanner is
123 * assumed.
124 */
125 boolean isFileScanner();
126 }