1 /**
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.filter;
21
22 import org.apache.hadoop.hbase.HConstants;
23 import org.apache.hadoop.hbase.util.Bytes;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27
28 import java.io.DataInput;
29 import java.io.DataOutput;
30 import java.io.IOException;
31 import java.nio.charset.Charset;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.util.Arrays;
34 import java.util.regex.Pattern;
35
36 /**
37 * This comparator is for use with {@link CompareFilter} implementations, such
38 * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
39 * filtering based on the value of a given column. Use it to test if a given
40 * regular expression matches a cell value in the column.
41 * <p>
42 * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
43 * <p>
44 * For example:
45 * <p>
46 * <pre>
47 * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
48 * new RegexStringComparator(
49 * // v4 IP address
50 * "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
51 * "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
52 * "|" +
53 * // v6 IP address
54 * "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
55 * "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
56 * </pre>
57 */
58 public class RegexStringComparator extends WritableByteArrayComparable {
59
60 private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
61
62 private Charset charset = Charset.forName(HConstants.UTF8_ENCODING);
63
64 private Pattern pattern;
65
66 /** Nullary constructor for Writable, do not use */
67 public RegexStringComparator() { }
68
69 /**
70 * Constructor
71 * @param expr a valid regular expression
72 */
73 public RegexStringComparator(String expr) {
74 super(Bytes.toBytes(expr));
75 this.pattern = Pattern.compile(expr, Pattern.DOTALL);
76 }
77
78 /**
79 * Specifies the {@link Charset} to use to convert the row key to a String.
80 * <p>
81 * The row key needs to be converted to a String in order to be matched
82 * against the regular expression. This method controls which charset is
83 * used to do this conversion.
84 * <p>
85 * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
86 * is recommended.
87 * @param charset The charset to use.
88 */
89 public void setCharset(final Charset charset) {
90 this.charset = charset;
91 }
92
93 @Override
94 public int compareTo(byte[] value, int offset, int length) {
95 // Use find() for subsequence match instead of matches() (full sequence
96 // match) to adhere to the principle of least surprise.
97 String tmp;
98 if (length < value.length / 2) {
99 // See HBASE-9428. Make a copy of the relevant part of the byte[],
100 // or the JDK will copy the entire byte[] during String decode
101 tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
102 } else {
103 tmp = new String(value, offset, length, charset);
104 }
105 return pattern.matcher(tmp).find() ? 0 : 1;
106 }
107
108 @Override
109 public void readFields(DataInput in) throws IOException {
110 final String expr = in.readUTF();
111 this.value = Bytes.toBytes(expr);
112 this.pattern = Pattern.compile(expr);
113 final String charset = in.readUTF();
114 if (charset.length() > 0) {
115 try {
116 this.charset = Charset.forName(charset);
117 } catch (IllegalCharsetNameException e) {
118 LOG.error("invalid charset", e);
119 }
120 }
121 }
122
123 @Override
124 public void write(DataOutput out) throws IOException {
125 out.writeUTF(pattern.toString());
126 out.writeUTF(charset.name());
127 }
128
129 }