1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.util.ArrayList;
22 import java.util.List;
23 import java.util.NavigableSet;
24
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29 import org.apache.hadoop.hbase.util.Bytes;
30
31 /**
32 * This class is used for the tracking and enforcement of columns and numbers
33 * of versions during the course of a Get or Scan operation, when explicit
34 * column qualifiers have been asked for in the query.
35 *
36 * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
37 * for both scans and gets. The main difference is 'next' and 'done' collapse
38 * for the scan case (since we see all columns in order), and we only reset
39 * between rows.
40 *
41 * <p>
42 * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
43 * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
44 * conditions of the query.
45 * <ul><li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher
46 * believes that the current column should be skipped (by timestamp, filter etc.)
47 * <p>
48 * These two methods returns a
49 * {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode}
50 * to define what action should be taken.
51 * <p>
52 * This class is NOT thread-safe as queries are never multi-threaded
53 */
54 @InterfaceAudience.Private
55 public class ExplicitColumnTracker implements ColumnTracker {
56
57 private final int maxVersions;
58 private final int minVersions;
59
60 /**
61 * Contains the list of columns that the ExplicitColumnTracker is tracking.
62 * Each ColumnCount instance also tracks how many versions of the requested
63 * column have been returned.
64 */
65 private final List<ColumnCount> columns;
66 private int index;
67 private ColumnCount column;
68 /** Keeps track of the latest timestamp included for current column.
69 * Used to eliminate duplicates. */
70 private long latestTSOfCurrentColumn;
71 private long oldestStamp;
72
73 /**
74 * Default constructor.
75 * @param columns columns specified user in query
76 * @param minVersions minimum number of versions to keep
77 * @param maxVersions maximum versions to return per column
78 * @param oldestUnexpiredTS the oldest timestamp we are interested in,
79 * based on TTL
80 */
81 public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
82 int maxVersions, long oldestUnexpiredTS) {
83 this.maxVersions = maxVersions;
84 this.minVersions = minVersions;
85 this.oldestStamp = oldestUnexpiredTS;
86 this.columns = new ArrayList<ColumnCount>(columns.size());
87 for(byte [] column : columns) {
88 this.columns.add(new ColumnCount(column));
89 }
90 reset();
91 }
92
93 /**
94 * Done when there are no more columns to match against.
95 */
96 public boolean done() {
97 return this.index >= this.columns.size();
98 }
99
100 public ColumnCount getColumnHint() {
101 return this.column;
102 }
103
104 /**
105 * {@inheritDoc}
106 */
107 @Override
108 public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
109 int length, long timestamp, byte type, boolean ignoreCount) {
110 // delete markers should never be passed to an
111 // *Explicit*ColumnTracker
112 assert !KeyValue.isDelete(type);
113 do {
114 // No more columns left, we are done with this query
115 if(done()) {
116 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
117 }
118
119 // No more columns to match against, done with storefile
120 if(this.column == null) {
121 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
122 }
123
124 // Compare specific column to current column
125 int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
126 column.getLength(), bytes, offset, length);
127
128 // Column Matches. If it is not a duplicate key, increment the version count
129 // and include.
130 if(ret == 0) {
131 if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
132
133 //If column matches, check if it is a duplicate timestamp
134 if (sameAsPreviousTS(timestamp)) {
135 //If duplicate, skip this Key
136 return ScanQueryMatcher.MatchCode.SKIP;
137 }
138 int count = this.column.increment();
139 if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
140 // Done with versions for this column
141 ++this.index;
142 resetTS();
143 if (done()) {
144 // We have served all the requested columns.
145 this.column = null;
146 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
147 } else {
148 // We are done with current column; advance to next column
149 // of interest.
150 this.column = this.columns.get(this.index);
151 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
152 }
153 } else {
154 setTS(timestamp);
155 }
156 return ScanQueryMatcher.MatchCode.INCLUDE;
157 }
158
159 resetTS();
160
161 if (ret > 0) {
162 // The current KV is smaller than the column the ExplicitColumnTracker
163 // is interested in, so seek to that column of interest.
164 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
165 }
166
167 // The current KV is bigger than the column the ExplicitColumnTracker
168 // is interested in. That means there is no more data for the column
169 // of interest. Advance the ExplicitColumnTracker state to next
170 // column of interest, and check again.
171 if (ret <= -1) {
172 ++this.index;
173 if (done()) {
174 // No more to match, do not include, done with this row.
175 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
176 }
177 // This is the recursive case.
178 this.column = this.columns.get(this.index);
179 }
180 } while(true);
181 }
182
183 // Called between every row.
184 public void reset() {
185 this.index = 0;
186 this.column = this.columns.get(this.index);
187 for(ColumnCount col : this.columns) {
188 col.setCount(0);
189 }
190 resetTS();
191 }
192
193 private void resetTS() {
194 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
195 }
196
197 private void setTS(long timestamp) {
198 latestTSOfCurrentColumn = timestamp;
199 }
200
201 private boolean sameAsPreviousTS(long timestamp) {
202 return timestamp == latestTSOfCurrentColumn;
203 }
204
205 private boolean isExpired(long timestamp) {
206 return timestamp < oldestStamp;
207 }
208
209 /**
210 * This method is used to inform the column tracker that we are done with
211 * this column. We may get this information from external filters or
212 * timestamp range and we then need to indicate this information to
213 * tracker. It is required only in case of ExplicitColumnTracker.
214 * @param bytes
215 * @param offset
216 * @param length
217 */
218 public void doneWithColumn(byte [] bytes, int offset, int length) {
219 while (this.column != null) {
220 int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
221 column.getLength(), bytes, offset, length);
222 resetTS();
223 if (compare <= 0) {
224 ++this.index;
225 if (done()) {
226 // Will not hit any more columns in this storefile
227 this.column = null;
228 } else {
229 this.column = this.columns.get(this.index);
230 }
231 if (compare <= -1)
232 continue;
233 }
234 return;
235 }
236 }
237
238 public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
239 int qualLength) {
240 doneWithColumn(bytes, offset,qualLength);
241
242 if (getColumnHint() == null) {
243 return MatchCode.SEEK_NEXT_ROW;
244 } else {
245 return MatchCode.SEEK_NEXT_COL;
246 }
247 }
248
249 public boolean isDone(long timestamp) {
250 return minVersions <= 0 && isExpired(timestamp);
251 }
252 }