1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.client;
21
22 import org.apache.hadoop.classification.InterfaceAudience;
23 import org.apache.hadoop.classification.InterfaceStability;
24 import org.apache.hadoop.hbase.HConstants;
25 import org.apache.hadoop.hbase.filter.Filter;
26 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
27 import org.apache.hadoop.hbase.io.TimeRange;
28 import org.apache.hadoop.hbase.util.Bytes;
29
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.HashMap;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.NavigableSet;
36 import java.util.TreeMap;
37 import java.util.TreeSet;
38
39 /**
40 * Used to perform Scan operations.
41 * <p>
42 * All operations are identical to {@link Get} with the exception of
43 * instantiation. Rather than specifying a single row, an optional startRow
44 * and stopRow may be defined. If rows are not specified, the Scanner will
45 * iterate over all rows.
46 * <p>
47 * To scan everything for each row, instantiate a Scan object.
48 * <p>
49 * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
50 * If caching is NOT set, we will use the caching value of the hosting {@link HTable}. See
51 * {@link HTable#setScannerCaching(int)}. In addition to row caching, it is possible to specify a
52 * maximum result size, using {@link #setMaxResultSize(long)}. When both are used,
53 * single server requests are limited by either number of rows or maximum result size, whichever
54 * limit comes first.
55 * <p>
56 * To further define the scope of what to get when scanning, perform additional
57 * methods as outlined below.
58 * <p>
59 * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
60 * for each family to retrieve.
61 * <p>
62 * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
63 * for each column to retrieve.
64 * <p>
65 * To only retrieve columns within a specific range of version timestamps,
66 * execute {@link #setTimeRange(long, long) setTimeRange}.
67 * <p>
68 * To only retrieve columns with a specific timestamp, execute
69 * {@link #setTimeStamp(long) setTimestamp}.
70 * <p>
71 * To limit the number of versions of each column to be returned, execute
72 * {@link #setMaxVersions(int) setMaxVersions}.
73 * <p>
74 * To limit the maximum number of values returned for each call to next(),
75 * execute {@link #setBatch(int) setBatch}.
76 * <p>
77 * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
78 * <p>
79 * Expert: To explicitly disable server-side block caching for this scan,
80 * execute {@link #setCacheBlocks(boolean)}.
81 */
82 @InterfaceAudience.Public
83 @InterfaceStability.Stable
84 public class Scan extends OperationWithAttributes {
85 private static final String RAW_ATTR = "_raw_";
86 private static final String ISOLATION_LEVEL = "_isolationlevel_";
87
88 private byte [] startRow = HConstants.EMPTY_START_ROW;
89 private byte [] stopRow = HConstants.EMPTY_END_ROW;
90 private int maxVersions = 1;
91 private int batch = -1;
92
93 private int storeLimit = -1;
94 private int storeOffset = 0;
95 private boolean getScan;
96
97 // If application wants to collect scan metrics, it needs to
98 // call scan.setAttribute(SCAN_ATTRIBUTES_ENABLE, Bytes.toBytes(Boolean.TRUE))
99 static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
100 static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data";
101
102 // If an application wants to use multiple scans over different tables each scan must
103 // define this attribute with the appropriate table name by calling
104 // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
105 static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
106
107 /*
108 * -1 means no caching
109 */
110 private int caching = -1;
111 private long maxResultSize = -1;
112 private boolean cacheBlocks = true;
113 private Filter filter = null;
114 private TimeRange tr = new TimeRange();
115 private Map<byte [], NavigableSet<byte []>> familyMap =
116 new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
117 private Boolean loadColumnFamiliesOnDemand = null;
118
119 /**
120 * Create a Scan operation across all rows.
121 */
122 public Scan() {}
123
124 public Scan(byte [] startRow, Filter filter) {
125 this(startRow);
126 this.filter = filter;
127 }
128
129 /**
130 * Create a Scan operation starting at the specified row.
131 * <p>
132 * If the specified row does not exist, the Scanner will start from the
133 * next closest row after the specified row.
134 * @param startRow row to start scanner at or after
135 */
136 public Scan(byte [] startRow) {
137 this.startRow = startRow;
138 }
139
140 /**
141 * Create a Scan operation for the range of rows specified.
142 * @param startRow row to start scanner at or after (inclusive)
143 * @param stopRow row to stop scanner before (exclusive)
144 */
145 public Scan(byte [] startRow, byte [] stopRow) {
146 this.startRow = startRow;
147 this.stopRow = stopRow;
148 //if the startRow and stopRow both are empty, it is not a Get
149 this.getScan = isStartRowAndEqualsStopRow();
150 }
151
152 /**
153 * Creates a new instance of this class while copying all values.
154 *
155 * @param scan The scan instance to copy from.
156 * @throws IOException When copying the values fails.
157 */
158 public Scan(Scan scan) throws IOException {
159 startRow = scan.getStartRow();
160 stopRow = scan.getStopRow();
161 maxVersions = scan.getMaxVersions();
162 batch = scan.getBatch();
163 storeLimit = scan.getMaxResultsPerColumnFamily();
164 storeOffset = scan.getRowOffsetPerColumnFamily();
165 caching = scan.getCaching();
166 maxResultSize = scan.getMaxResultSize();
167 cacheBlocks = scan.getCacheBlocks();
168 getScan = scan.isGetScan();
169 filter = scan.getFilter(); // clone?
170 loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
171 TimeRange ctr = scan.getTimeRange();
172 tr = new TimeRange(ctr.getMin(), ctr.getMax());
173 Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
174 for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
175 byte [] fam = entry.getKey();
176 NavigableSet<byte[]> cols = entry.getValue();
177 if (cols != null && cols.size() > 0) {
178 for (byte[] col : cols) {
179 addColumn(fam, col);
180 }
181 } else {
182 addFamily(fam);
183 }
184 }
185 for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
186 setAttribute(attr.getKey(), attr.getValue());
187 }
188 }
189
190 /**
191 * Builds a scan object with the same specs as get.
192 * @param get get to model scan after
193 */
194 public Scan(Get get) {
195 this.startRow = get.getRow();
196 this.stopRow = get.getRow();
197 this.filter = get.getFilter();
198 this.cacheBlocks = get.getCacheBlocks();
199 this.maxVersions = get.getMaxVersions();
200 this.storeLimit = get.getMaxResultsPerColumnFamily();
201 this.storeOffset = get.getRowOffsetPerColumnFamily();
202 this.tr = get.getTimeRange();
203 this.familyMap = get.getFamilyMap();
204 this.getScan = true;
205 }
206
207 public boolean isGetScan() {
208 return this.getScan || isStartRowAndEqualsStopRow();
209 }
210
211 private boolean isStartRowAndEqualsStopRow() {
212 return this.startRow != null && this.startRow.length > 0 &&
213 Bytes.equals(this.startRow, this.stopRow);
214 }
215 /**
216 * Get all columns from the specified family.
217 * <p>
218 * Overrides previous calls to addColumn for this family.
219 * @param family family name
220 * @return this
221 */
222 public Scan addFamily(byte [] family) {
223 familyMap.remove(family);
224 familyMap.put(family, null);
225 return this;
226 }
227
228 /**
229 * Get the column from the specified family with the specified qualifier.
230 * <p>
231 * Overrides previous calls to addFamily for this family.
232 * @param family family name
233 * @param qualifier column qualifier
234 * @return this
235 */
236 public Scan addColumn(byte [] family, byte [] qualifier) {
237 NavigableSet<byte []> set = familyMap.get(family);
238 if(set == null) {
239 set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
240 }
241 if (qualifier == null) {
242 qualifier = HConstants.EMPTY_BYTE_ARRAY;
243 }
244 set.add(qualifier);
245 familyMap.put(family, set);
246 return this;
247 }
248
249 /**
250 * Get versions of columns only within the specified timestamp range,
251 * [minStamp, maxStamp). Note, default maximum versions to return is 1. If
252 * your time range spans more than one version and you want all versions
253 * returned, up the number of versions beyond the defaut.
254 * @param minStamp minimum timestamp value, inclusive
255 * @param maxStamp maximum timestamp value, exclusive
256 * @throws IOException if invalid time range
257 * @see #setMaxVersions()
258 * @see #setMaxVersions(int)
259 * @return this
260 */
261 public Scan setTimeRange(long minStamp, long maxStamp)
262 throws IOException {
263 tr = new TimeRange(minStamp, maxStamp);
264 return this;
265 }
266
267 /**
268 * Get versions of columns with the specified timestamp. Note, default maximum
269 * versions to return is 1. If your time range spans more than one version
270 * and you want all versions returned, up the number of versions beyond the
271 * defaut.
272 * @param timestamp version timestamp
273 * @see #setMaxVersions()
274 * @see #setMaxVersions(int)
275 * @return this
276 */
277 public Scan setTimeStamp(long timestamp) {
278 try {
279 tr = new TimeRange(timestamp, timestamp+1);
280 } catch(IOException e) {
281 // Will never happen
282 }
283 return this;
284 }
285
286 /**
287 * Set the start row of the scan.
288 * @param startRow row to start scan on (inclusive)
289 * Note: In order to make startRow exclusive add a trailing 0 byte
290 * @return this
291 */
292 public Scan setStartRow(byte [] startRow) {
293 this.startRow = startRow;
294 return this;
295 }
296
297 /**
298 * Set the stop row.
299 * @param stopRow row to end at (exclusive)
300 * Note: In order to make stopRow inclusive add a trailing 0 byte
301 * @return this
302 */
303 public Scan setStopRow(byte [] stopRow) {
304 this.stopRow = stopRow;
305 return this;
306 }
307
308 /**
309 * Get all available versions.
310 * @return this
311 */
312 public Scan setMaxVersions() {
313 this.maxVersions = Integer.MAX_VALUE;
314 return this;
315 }
316
317 /**
318 * Get up to the specified number of versions of each column.
319 * @param maxVersions maximum versions for each column
320 * @return this
321 */
322 public Scan setMaxVersions(int maxVersions) {
323 this.maxVersions = maxVersions;
324 return this;
325 }
326
327 /**
328 * Set the maximum number of values to return for each call to next()
329 * @param batch the maximum number of values
330 */
331 public void setBatch(int batch) {
332 if (this.hasFilter() && this.filter.hasFilterRow()) {
333 throw new IncompatibleFilterException(
334 "Cannot set batch on a scan using a filter" +
335 " that returns true for filter.hasFilterRow");
336 }
337 this.batch = batch;
338 }
339
340 /**
341 * Set the maximum number of values to return per row per Column Family
342 * @param limit the maximum number of values returned / row / CF
343 */
344 public void setMaxResultsPerColumnFamily(int limit) {
345 this.storeLimit = limit;
346 }
347
348 /**
349 * Set offset for the row per Column Family.
350 * @param offset is the number of kvs that will be skipped.
351 */
352 public void setRowOffsetPerColumnFamily(int offset) {
353 this.storeOffset = offset;
354 }
355
356 /**
357 * Set the number of rows for caching that will be passed to scanners.
358 * If not set, the default setting from {@link HTable#getScannerCaching()} will apply.
359 * Higher caching values will enable faster scanners but will use more memory.
360 * @param caching the number of rows for caching
361 */
362 public void setCaching(int caching) {
363 this.caching = caching;
364 }
365
366 /**
367 * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)}
368 */
369 public long getMaxResultSize() {
370 return maxResultSize;
371 }
372
373 /**
374 * Set the maximum result size. The default is -1; this means that no specific
375 * maximum result size will be set for this scan, and the global configured
376 * value will be used instead. (Defaults to unlimited).
377 *
378 * @param maxResultSize The maximum result size in bytes.
379 */
380 public void setMaxResultSize(long maxResultSize) {
381 this.maxResultSize = maxResultSize;
382 }
383
384 /**
385 * Apply the specified server-side filter when performing the Scan.
386 * @param filter filter to run on the server
387 * @return this
388 */
389 public Scan setFilter(Filter filter) {
390 this.filter = filter;
391 return this;
392 }
393
394 /**
395 * Setting the familyMap
396 * @param familyMap map of family to qualifier
397 * @return this
398 */
399 public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
400 this.familyMap = familyMap;
401 return this;
402 }
403
404 /**
405 * Getting the familyMap
406 * @return familyMap
407 */
408 public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
409 return this.familyMap;
410 }
411
412 /**
413 * @return the number of families in familyMap
414 */
415 public int numFamilies() {
416 if(hasFamilies()) {
417 return this.familyMap.size();
418 }
419 return 0;
420 }
421
422 /**
423 * @return true if familyMap is non empty, false otherwise
424 */
425 public boolean hasFamilies() {
426 return !this.familyMap.isEmpty();
427 }
428
429 /**
430 * @return the keys of the familyMap
431 */
432 public byte[][] getFamilies() {
433 if(hasFamilies()) {
434 return this.familyMap.keySet().toArray(new byte[0][0]);
435 }
436 return null;
437 }
438
439 /**
440 * @return the startrow
441 */
442 public byte [] getStartRow() {
443 return this.startRow;
444 }
445
446 /**
447 * @return the stoprow
448 */
449 public byte [] getStopRow() {
450 return this.stopRow;
451 }
452
453 /**
454 * @return the max number of versions to fetch
455 */
456 public int getMaxVersions() {
457 return this.maxVersions;
458 }
459
460 /**
461 * @return maximum number of values to return for a single call to next()
462 */
463 public int getBatch() {
464 return this.batch;
465 }
466
467 /**
468 * @return maximum number of values to return per row per CF
469 */
470 public int getMaxResultsPerColumnFamily() {
471 return this.storeLimit;
472 }
473
474 /**
475 * Method for retrieving the scan's offset per row per column
476 * family (#kvs to be skipped)
477 * @return row offset
478 */
479 public int getRowOffsetPerColumnFamily() {
480 return this.storeOffset;
481 }
482
483 /**
484 * @return caching the number of rows fetched when calling next on a scanner
485 */
486 public int getCaching() {
487 return this.caching;
488 }
489
490 /**
491 * @return TimeRange
492 */
493 public TimeRange getTimeRange() {
494 return this.tr;
495 }
496
497 /**
498 * @return RowFilter
499 */
500 public Filter getFilter() {
501 return filter;
502 }
503
504 /**
505 * @return true is a filter has been specified, false if not
506 */
507 public boolean hasFilter() {
508 return filter != null;
509 }
510
511 /**
512 * Set whether blocks should be cached for this Scan.
513 * <p>
514 * This is true by default. When true, default settings of the table and
515 * family are used (this will never override caching blocks if the block
516 * cache is disabled for that family or entirely).
517 *
518 * @param cacheBlocks if false, default settings are overridden and blocks
519 * will not be cached
520 */
521 public void setCacheBlocks(boolean cacheBlocks) {
522 this.cacheBlocks = cacheBlocks;
523 }
524
525 /**
526 * Get whether blocks should be cached for this Scan.
527 * @return true if default caching should be used, false if blocks should not
528 * be cached
529 */
530 public boolean getCacheBlocks() {
531 return cacheBlocks;
532 }
533
534 /**
535 * Set the value indicating whether loading CFs on demand should be allowed (cluster
536 * default is false). On-demand CF loading doesn't load column families until necessary, e.g.
537 * if you filter on one column, the other column family data will be loaded only for the rows
538 * that are included in result, not all rows like in normal case.
539 * With column-specific filters, like SingleColumnValueFilter w/filterIfMissing == true,
540 * this can deliver huge perf gains when there's a cf with lots of data; however, it can
541 * also lead to some inconsistent results, as follows:
542 * - if someone does a concurrent update to both column families in question you may get a row
543 * that never existed, e.g. for { rowKey = 5, { cat_videos => 1 }, { video => "my cat" } }
544 * someone puts rowKey 5 with { cat_videos => 0 }, { video => "my dog" }, concurrent scan
545 * filtering on "cat_videos == 1" can get { rowKey = 5, { cat_videos => 1 },
546 * { video => "my dog" } }.
547 * - if there's a concurrent split and you have more than 2 column families, some rows may be
548 * missing some column families.
549 */
550 public void setLoadColumnFamiliesOnDemand(boolean value) {
551 this.loadColumnFamiliesOnDemand = value;
552 }
553
554 /**
555 * Get the raw loadColumnFamiliesOnDemand setting; if it's not set, can be null.
556 */
557 public Boolean getLoadColumnFamiliesOnDemandValue() {
558 return this.loadColumnFamiliesOnDemand;
559 }
560
561 /**
562 * Get the logical value indicating whether on-demand CF loading should be allowed.
563 */
564 public boolean doLoadColumnFamiliesOnDemand() {
565 return (this.loadColumnFamiliesOnDemand != null)
566 && this.loadColumnFamiliesOnDemand.booleanValue();
567 }
568
569 /**
570 * Compile the table and column family (i.e. schema) information
571 * into a String. Useful for parsing and aggregation by debugging,
572 * logging, and administration tools.
573 * @return Map
574 */
575 @Override
576 public Map<String, Object> getFingerprint() {
577 Map<String, Object> map = new HashMap<String, Object>();
578 List<String> families = new ArrayList<String>();
579 if(this.familyMap.size() == 0) {
580 map.put("families", "ALL");
581 return map;
582 } else {
583 map.put("families", families);
584 }
585 for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
586 this.familyMap.entrySet()) {
587 families.add(Bytes.toStringBinary(entry.getKey()));
588 }
589 return map;
590 }
591
592 /**
593 * Compile the details beyond the scope of getFingerprint (row, columns,
594 * timestamps, etc.) into a Map along with the fingerprinted information.
595 * Useful for debugging, logging, and administration tools.
596 * @param maxCols a limit on the number of columns output prior to truncation
597 * @return Map
598 */
599 @Override
600 public Map<String, Object> toMap(int maxCols) {
601 // start with the fingerpring map and build on top of it
602 Map<String, Object> map = getFingerprint();
603 // map from families to column list replaces fingerprint's list of families
604 Map<String, List<String>> familyColumns =
605 new HashMap<String, List<String>>();
606 map.put("families", familyColumns);
607 // add scalar information first
608 map.put("startRow", Bytes.toStringBinary(this.startRow));
609 map.put("stopRow", Bytes.toStringBinary(this.stopRow));
610 map.put("maxVersions", this.maxVersions);
611 map.put("batch", this.batch);
612 map.put("caching", this.caching);
613 map.put("maxResultSize", this.maxResultSize);
614 map.put("cacheBlocks", this.cacheBlocks);
615 map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
616 List<Long> timeRange = new ArrayList<Long>();
617 timeRange.add(this.tr.getMin());
618 timeRange.add(this.tr.getMax());
619 map.put("timeRange", timeRange);
620 int colCount = 0;
621 // iterate through affected families and list out up to maxCols columns
622 for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
623 this.familyMap.entrySet()) {
624 List<String> columns = new ArrayList<String>();
625 familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
626 if(entry.getValue() == null) {
627 colCount++;
628 --maxCols;
629 columns.add("ALL");
630 } else {
631 colCount += entry.getValue().size();
632 if (maxCols <= 0) {
633 continue;
634 }
635 for (byte [] column : entry.getValue()) {
636 if (--maxCols <= 0) {
637 continue;
638 }
639 columns.add(Bytes.toStringBinary(column));
640 }
641 }
642 }
643 map.put("totalColumns", colCount);
644 if (this.filter != null) {
645 map.put("filter", this.filter.toString());
646 }
647 // add the id if set
648 if (getId() != null) {
649 map.put("id", getId());
650 }
651 return map;
652 }
653
654 /**
655 * Enable/disable "raw" mode for this scan.
656 * If "raw" is enabled the scan will return all
657 * delete marker and deleted rows that have not
658 * been collected, yet.
659 * This is mostly useful for Scan on column families
660 * that have KEEP_DELETED_ROWS enabled.
661 * It is an error to specify any column when "raw" is set.
662 * @param raw True/False to enable/disable "raw" mode.
663 */
664 public void setRaw(boolean raw) {
665 setAttribute(RAW_ATTR, Bytes.toBytes(raw));
666 }
667
668 /**
669 * @return True if this Scan is in "raw" mode.
670 */
671 public boolean isRaw() {
672 byte[] attr = getAttribute(RAW_ATTR);
673 return attr == null ? false : Bytes.toBoolean(attr);
674 }
675
676 /*
677 * Set the isolation level for this scan. If the
678 * isolation level is set to READ_UNCOMMITTED, then
679 * this scan will return data from committed and
680 * uncommitted transactions. If the isolation level
681 * is set to READ_COMMITTED, then this scan will return
682 * data from committed transactions only. If a isolation
683 * level is not explicitly set on a Scan, then it
684 * is assumed to be READ_COMMITTED.
685 * @param level IsolationLevel for this scan
686 */
687 public void setIsolationLevel(IsolationLevel level) {
688 setAttribute(ISOLATION_LEVEL, level.toBytes());
689 }
690 /*
691 * @return The isolation level of this scan.
692 * If no isolation level was set for this scan object,
693 * then it returns READ_COMMITTED.
694 * @return The IsolationLevel for this scan
695 */
696 public IsolationLevel getIsolationLevel() {
697 byte[] attr = getAttribute(ISOLATION_LEVEL);
698 return attr == null ? IsolationLevel.READ_COMMITTED :
699 IsolationLevel.fromBytes(attr);
700 }
701 }