1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.hbase.util.vint;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23
24 import org.apache.hadoop.classification.InterfaceAudience;
25
26 /**
27 * UFInt is an abbreviation for Unsigned Fixed-width Integer.
28 *
29 * This class converts between positive ints and 1-4 bytes that represent the int. All input ints
30 * must be positive. Max values stored in N bytes are:
31 *
32 * N=1: 2^8 => 256
33 * N=2: 2^16 => 65,536
34 * N=3: 2^24 => 16,777,216
35 * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
36 *
37 * This was created to get most of the memory savings of a variable length integer when encoding
38 * an array of input integers, but to fix the number of bytes for each integer to the number needed
39 * to store the maximum integer in the array. This enables a binary search to be performed on the
40 * array of encoded integers.
41 *
42 * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
43 * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
44 * numbers will also require 2 bytes.
45 *
46 * warnings:
47 * * no input validation for max performance
48 * * no negatives
49 */
50 @InterfaceAudience.Private
51 public class UFIntTool {
52
53 private static final int NUM_BITS_IN_LONG = 64;
54
55 public static long maxValueForNumBytes(int numBytes) {
56 return (1L << (numBytes * 8)) - 1;
57 }
58
59 public static int numBytes(final long value) {
60 if (value == 0) {// 0 doesn't work with the formula below
61 return 1;
62 }
63 return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
64 }
65
66 public static byte[] getBytes(int outputWidth, final long value) {
67 byte[] bytes = new byte[outputWidth];
68 writeBytes(outputWidth, value, bytes, 0);
69 return bytes;
70 }
71
72 public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
73 bytes[offset + outputWidth - 1] = (byte) value;
74 for (int i = outputWidth - 2; i >= 0; --i) {
75 bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
76 }
77 }
78
79 private static final long[] MASKS = new long[] {
80 (long) 255,
81 (long) 255 << 8,
82 (long) 255 << 16,
83 (long) 255 << 24,
84 (long) 255 << 32,
85 (long) 255 << 40,
86 (long) 255 << 48,
87 (long) 255 << 56
88 };
89
90 public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
91 for (int i = outputWidth - 1; i >= 0; --i) {
92 os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
93 }
94 }
95
96 public static long fromBytes(final byte[] bytes) {
97 long value = 0;
98 value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
99 for (int i = 1; i < bytes.length; ++i) {
100 value <<= 8;
101 value |= bytes[i] & 0xff;
102 }
103 return value;
104 }
105
106 public static long fromBytes(final byte[] bytes, final int offset, final int width) {
107 long value = 0;
108 value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
109 for (int i = 1; i < width; ++i) {
110 value <<= 8;
111 value |= bytes[i + offset] & 0xff;
112 }
113 return value;
114 }
115
116 }