1 /** 2 * Copyright 2011 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package org.apache.hadoop.hbase.coprocessor; 22 23 import java.io.IOException; 24 import java.util.List; 25 26 import org.apache.hadoop.hbase.client.Scan; 27 import org.apache.hadoop.hbase.client.coprocessor.AggregationClient; 28 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; 29 import org.apache.hadoop.hbase.util.Pair; 30 31 /** 32 * Defines the aggregation functions that are to be supported in this 33 * Coprocessor. For each method, it takes a Scan object and a columnInterpreter. 34 * The scan object should have a column family (else an exception will be 35 * thrown), and an optional column qualifier. In the current implementation 36 * {@link AggregateImplementation}, only one column family and column qualifier 37 * combination is served. In case there are more than one, only first one will 38 * be picked. Refer to {@link AggregationClient} for some general conditions on 39 * input parameters. 40 */ 41 public interface AggregateProtocol extends CoprocessorProtocol { 42 public static final long VERSION = 1L; 43 44 /** 45 * Gives the maximum for a given combination of column qualifier and column 46 * family, in the given row range as defined in the Scan object. In its 47 * current implementation, it takes one column family and one column qualifier 48 * (if provided). In case of null column qualifier, maximum value for the 49 * entire column family will be returned. 50 * @param ci 51 * @param scan 52 * @return max value as mentioned above 53 * @throws IOException 54 */ 55 <T, S> T getMax(ColumnInterpreter<T, S> ci, Scan scan) throws IOException; 56 57 /** 58 * Gives the minimum for a given combination of column qualifier and column 59 * family, in the given row range as defined in the Scan object. In its 60 * current implementation, it takes one column family and one column qualifier 61 * (if provided). In case of null column qualifier, minimum value for the 62 * entire column family will be returned. 63 * @param ci 64 * @param scan 65 * @return min as mentioned above 66 * @throws IOException 67 */ 68 <T, S> T getMin(ColumnInterpreter<T, S> ci, Scan scan) throws IOException; 69 70 /** 71 * Gives the sum for a given combination of column qualifier and column 72 * family, in the given row range as defined in the Scan object. In its 73 * current implementation, it takes one column family and one column qualifier 74 * (if provided). In case of null column qualifier, sum for the entire column 75 * family will be returned. 76 * @param ci 77 * @param scan 78 * @return sum of values as defined by the column interpreter 79 * @throws IOException 80 */ 81 <T, S> S getSum(ColumnInterpreter<T, S> ci, Scan scan) throws IOException; 82 83 /** 84 * @param ci 85 * @param scan 86 * @return Row count for the given column family and column qualifier, in 87 * the given row range as defined in the Scan object. 88 * @throws IOException 89 */ 90 <T, S> long getRowNum(ColumnInterpreter<T, S> ci, Scan scan) 91 throws IOException; 92 93 /** 94 * Gives a Pair with first object as Sum and second object as row count, 95 * computed for a given combination of column qualifier and column family in 96 * the given row range as defined in the Scan object. In its current 97 * implementation, it takes one column family and one column qualifier (if 98 * provided). In case of null column qualifier, an aggregate sum over all the 99 * entire column family will be returned. 100 * <p> 101 * The average is computed in 102 * {@link AggregationClient#avg(byte[], ColumnInterpreter, Scan)} by 103 * processing results from all regions, so its "ok" to pass sum and a Long 104 * type. 105 * @param ci 106 * @param scan 107 * @return Average 108 * @throws IOException 109 */ 110 <T, S> Pair<S, Long> getAvg(ColumnInterpreter<T, S> ci, Scan scan) 111 throws IOException; 112 113 /** 114 * Gives a Pair with first object a List containing Sum and sum of squares, 115 * and the second object as row count. It is computed for a given combination of 116 * column qualifier and column family in the given row range as defined in the 117 * Scan object. In its current implementation, it takes one column family and 118 * one column qualifier (if provided). The idea is get the value of variance first: 119 * the average of the squares less the square of the average a standard 120 * deviation is square root of variance. 121 * @param ci 122 * @param scan 123 * @return STD 124 * @throws IOException 125 */ 126 <T, S> Pair<List<S>, Long> getStd(ColumnInterpreter<T, S> ci, Scan scan) 127 throws IOException; 128 129 /** 130 * Gives a List containing sum of values and sum of weights. 131 * It is computed for the combination of column 132 * family and column qualifier(s) in the given row range as defined in the 133 * Scan object. In its current implementation, it takes one column family and 134 * two column qualifiers. The first qualifier is for values column and 135 * the second qualifier (optional) is for weight column. 136 * @param ci 137 * @param scan 138 * @return Pair 139 * @throws IOException 140 */ 141 <T, S> List<S> getMedian(ColumnInterpreter<T, S> ci, Scan scan) 142 throws IOException; 143 144 }