1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  package org.apache.hadoop.hbase.filter;
19  
20  import com.google.protobuf.ByteString;
21  import com.google.protobuf.InvalidProtocolBufferException;
22  import org.apache.hadoop.classification.InterfaceAudience;
23  import org.apache.hadoop.classification.InterfaceStability;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.exceptions.DeserializationException;
26  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
27  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.hbase.util.Pair;
30  
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.List;
34  
35  
36  
37  
38  
39  
40  
41  
42  
43  
44  
45  
46  
47  
48  
49  
50  
51  
52  
53  
54  
55  
56  
57  
58  
59  
60  
61  
62  
63  @InterfaceAudience.Public
64  @InterfaceStability.Evolving
65  public class FuzzyRowFilter extends FilterBase {
66    private List<Pair<byte[], byte[]>> fuzzyKeysData;
67    private boolean done = false;
68  
69    public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
70      this.fuzzyKeysData = fuzzyKeysData;
71    }
72  
73    
74    @Override
75    public ReturnCode filterKeyValue(KeyValue kv) {
76      byte[] rowKey = kv.getRow();
77      
78      SatisfiesCode bestOption = SatisfiesCode.NO_NEXT;
79      for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
80        SatisfiesCode satisfiesCode =
81                satisfies(rowKey, fuzzyData.getFirst(), fuzzyData.getSecond());
82        if (satisfiesCode == SatisfiesCode.YES) {
83          return ReturnCode.INCLUDE;
84        }
85  
86        if (satisfiesCode == SatisfiesCode.NEXT_EXISTS) {
87          bestOption = SatisfiesCode.NEXT_EXISTS;
88        }
89      }
90  
91      if (bestOption == SatisfiesCode.NEXT_EXISTS) {
92        return ReturnCode.SEEK_NEXT_USING_HINT;
93      }
94  
95      
96      done = true;
97      return ReturnCode.NEXT_ROW;
98    }
99  
100   @Override
101   public KeyValue getNextKeyHint(KeyValue currentKV) {
102     byte[] rowKey = currentKV.getRow();
103     byte[] nextRowKey = null;
104     
105     for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
106       byte[] nextRowKeyCandidate = getNextForFuzzyRule(rowKey,
107               fuzzyData.getFirst(), fuzzyData.getSecond());
108       if (nextRowKeyCandidate == null) {
109         continue;
110       }
111       if (nextRowKey == null || Bytes.compareTo(nextRowKeyCandidate, nextRowKey) < 0) {
112         nextRowKey = nextRowKeyCandidate;
113       }
114     }
115 
116     if (nextRowKey == null) {
117       
118       
119       throw new IllegalStateException("No next row key that satisfies fuzzy exists when" +
120                                          " getNextKeyHint() is invoked." +
121                                          " Filter: " + this.toString() +
122                                          " currentKV: " + currentKV.toString());
123     }
124 
125     return KeyValue.createFirstOnRow(nextRowKey);
126   }
127 
128   @Override
129   public boolean filterAllRemaining() {
130     return done;
131   }
132 
133   
134 
135 
136   public byte [] toByteArray() {
137     FilterProtos.FuzzyRowFilter.Builder builder =
138       FilterProtos.FuzzyRowFilter.newBuilder();
139     for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
140       BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
141       bbpBuilder.setFirst(ByteString.copyFrom(fuzzyData.getFirst()));
142       bbpBuilder.setSecond(ByteString.copyFrom(fuzzyData.getSecond()));
143       builder.addFuzzyKeysData(bbpBuilder);
144     }
145     return builder.build().toByteArray();
146   }
147 
148   
149 
150 
151 
152 
153 
154   public static FuzzyRowFilter parseFrom(final byte [] pbBytes)
155   throws DeserializationException {
156     FilterProtos.FuzzyRowFilter proto;
157     try {
158       proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
159     } catch (InvalidProtocolBufferException e) {
160       throw new DeserializationException(e);
161     }
162     int count = proto.getFuzzyKeysDataCount();
163     ArrayList<Pair<byte[], byte[]>> fuzzyKeysData= new ArrayList<Pair<byte[], byte[]>>(count);
164     for (int i = 0; i < count; ++i) {
165       BytesBytesPair current = proto.getFuzzyKeysData(i);
166       byte[] keyBytes = current.getFirst().toByteArray();
167       byte[] keyMeta = current.getSecond().toByteArray();
168       fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
169     }
170     return new FuzzyRowFilter(fuzzyKeysData);
171   }
172 
173   @Override
174   public String toString() {
175     final StringBuilder sb = new StringBuilder();
176     sb.append("FuzzyRowFilter");
177     sb.append("{fuzzyKeysData=");
178     for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
179       sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
180       sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
181     }
182     sb.append("}, ");
183     return sb.toString();
184   }
185 
186   
187 
188   static enum SatisfiesCode {
189     
190     YES,
191     
192     NEXT_EXISTS,
193     
194     NO_NEXT
195   }
196 
197   static SatisfiesCode satisfies(byte[] row,
198                                          byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
199     return satisfies(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
200   }
201 
202   private static SatisfiesCode satisfies(byte[] row, int offset, int length,
203                                          byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
204     if (row == null) {
205       
206       return SatisfiesCode.YES;
207     }
208 
209     boolean nextRowKeyCandidateExists = false;
210 
211     for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
212       
213       boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
214       boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
215       if (fixedByteIncorrect) {
216         
217         if (nextRowKeyCandidateExists) {
218           return SatisfiesCode.NEXT_EXISTS;
219         }
220 
221         
222         
223         
224         boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
225         return  rowByteLessThanFixed ? SatisfiesCode.NEXT_EXISTS : SatisfiesCode.NO_NEXT;
226       }
227 
228       
229       
230       
231       
232       
233       
234       if (fuzzyKeyMeta[i] == 1 && !isMax(fuzzyKeyBytes[i])) {
235         nextRowKeyCandidateExists = true;
236       }
237     }
238 
239     return SatisfiesCode.YES;
240   }
241 
242   private static boolean isMax(byte fuzzyKeyByte) {
243     return (fuzzyKeyByte & 0xFF) == 255;
244   }
245 
246   static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
247     return getNextForFuzzyRule(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
248   }
249 
250   
251 
252 
253 
254   private static byte[] getNextForFuzzyRule(byte[] row, int offset, int length,
255                                             byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
256     
257     
258     
259     
260     
261 
262     
263     
264     byte[] result = Arrays.copyOf(fuzzyKeyBytes,
265                                   length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
266     int toInc = -1;
267 
268     boolean increased = false;
269     for (int i = 0; i < result.length; i++) {
270       if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
271         result[i] = row[offset + i];
272         if (!isMax(row[i])) {
273           
274           toInc = i;
275         }
276       } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == 0) {
277         if ((row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF)) {
278           
279           
280           increased = true;
281           break;
282         }
283         if ((row[i + offset] & 0xFF) > (fuzzyKeyBytes[i] & 0xFF)) {
284           
285           
286           
287           break;
288         }
289       }
290     }
291 
292     if (!increased) {
293       if (toInc < 0) {
294         return null;
295       }
296       result[toInc]++;
297 
298       
299       
300       for (int i = toInc + 1; i < result.length; i++) {
301         if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
302           result[i] = 0;
303         }
304       }
305     }
306 
307     return result;
308   }
309 
310   
311 
312 
313 
314 
315   boolean areSerializedFieldsEqual(Filter o) {
316     if (o == this) return true;
317     if (!(o instanceof FuzzyRowFilter)) return false;
318 
319     FuzzyRowFilter other = (FuzzyRowFilter)o;
320     if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
321     for (int i = 0; i < fuzzyKeysData.size(); ++i) {
322       Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
323       Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
324       if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst())
325         && Bytes.equals(thisData.getSecond(), otherData.getSecond()))) {
326         return false;
327       }
328     }
329     return true;
330   }
331 
332 }