1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.util.ArrayList;
20  import java.util.Arrays;
21  import java.util.Collections;
22  import java.util.Random;
23  
24  /**
25   * A class that generates random numbers that follow some distribution.
26   * <p>
27   * Copied from
28   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
29   * Remove after tfile is committed and use the tfile version of this class
30   * instead.</p>
31   */
32  public class RandomDistribution {
33    /**
34     * Interface for discrete (integer) random distributions.
35     */
36    public interface DiscreteRNG {
37      /**
38       * Get the next random number
39       *
40       * @return the next random number.
41       */
42      int nextInt();
43    }
44  
45    /**
46     * P(i)=1/(max-min)
47     */
48    public static final class Flat implements DiscreteRNG {
49      private final Random random;
50      private final int min;
51      private final int max;
52  
53      /**
54       * Generate random integers from min (inclusive) to max (exclusive)
55       * following even distribution.
56       *
57       * @param random
58       *          The basic random number generator.
59       * @param min
60       *          Minimum integer
61       * @param max
62       *          maximum integer (exclusive).
63       *
64       */
65      public Flat(Random random, int min, int max) {
66        if (min >= max) {
67          throw new IllegalArgumentException("Invalid range");
68        }
69        this.random = random;
70        this.min = min;
71        this.max = max;
72      }
73  
74      /**
75       * @see DiscreteRNG#nextInt()
76       */
77      @Override
78      public int nextInt() {
79        return random.nextInt(max - min) + min;
80      }
81    }
82  
83    /**
84     * Zipf distribution. The ratio of the probabilities of integer i and j is
85     * defined as follows:
86     *
87     * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
88     */
89    public static final class Zipf implements DiscreteRNG {
90      private static final double DEFAULT_EPSILON = 0.001;
91      private final Random random;
92      private final ArrayList<Integer> k;
93      private final ArrayList<Double> v;
94  
95      /**
96       * Constructor
97       *
98       * @param r
99       *          The random number generator.
100      * @param min
101      *          minimum integer (inclusvie)
102      * @param max
103      *          maximum integer (exclusive)
104      * @param sigma
105      *          parameter sigma. (sigma > 1.0)
106      */
107     public Zipf(Random r, int min, int max, double sigma) {
108       this(r, min, max, sigma, DEFAULT_EPSILON);
109     }
110 
111     /**
112      * Constructor.
113      *
114      * @param r
115      *          The random number generator.
116      * @param min
117      *          minimum integer (inclusvie)
118      * @param max
119      *          maximum integer (exclusive)
120      * @param sigma
121      *          parameter sigma. (sigma > 1.0)
122      * @param epsilon
123      *          Allowable error percentage (0 < epsilon < 1.0).
124      */
125     public Zipf(Random r, int min, int max, double sigma, double epsilon) {
126       if ((max <= min) || (sigma <= 1) || (epsilon <= 0)
127           || (epsilon >= 0.5)) {
128         throw new IllegalArgumentException("Invalid arguments");
129       }
130       random = r;
131       k = new ArrayList<Integer>();
132       v = new ArrayList<Double>();
133 
134       double sum = 0;
135       int last = -1;
136       for (int i = min; i < max; ++i) {
137         sum += Math.exp(-sigma * Math.log(i - min + 1));
138         if ((last == -1) || i * (1 - epsilon) > last) {
139           k.add(i);
140           v.add(sum);
141           last = i;
142         }
143       }
144 
145       if (last != max - 1) {
146         k.add(max - 1);
147         v.add(sum);
148       }
149 
150       v.set(v.size() - 1, 1.0);
151 
152       for (int i = v.size() - 2; i >= 0; --i) {
153         v.set(i, v.get(i) / sum);
154       }
155     }
156 
157     /**
158      * @see DiscreteRNG#nextInt()
159      */
160     @Override
161     public int nextInt() {
162       double d = random.nextDouble();
163       int idx = Collections.binarySearch(v, d);
164 
165       if (idx > 0) {
166         ++idx;
167       }
168       else {
169         idx = -(idx + 1);
170       }
171 
172       if (idx >= v.size()) {
173         idx = v.size() - 1;
174       }
175 
176       if (idx == 0) {
177         return k.get(0);
178       }
179 
180       int ceiling = k.get(idx);
181       int lower = k.get(idx - 1);
182 
183       return ceiling - random.nextInt(ceiling - lower);
184     }
185   }
186 
187   /**
188    * Binomial distribution.
189    *
190    * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
191    *
192    * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
193    */
194   public static final class Binomial implements DiscreteRNG {
195     private final Random random;
196     private final int min;
197     private final int n;
198     private final double[] v;
199 
200     private static double select(int n, int k) {
201       double ret = 1.0;
202       for (int i = k + 1; i <= n; ++i) {
203         ret *= (double) i / (i - k);
204       }
205       return ret;
206     }
207 
208     private static double power(double p, int k) {
209       return Math.exp(k * Math.log(p));
210     }
211 
212     /**
213      * Generate random integers from min (inclusive) to max (exclusive)
214      * following Binomial distribution.
215      *
216      * @param random
217      *          The basic random number generator.
218      * @param min
219      *          Minimum integer
220      * @param max
221      *          maximum integer (exclusive).
222      * @param p
223      *          parameter.
224      *
225      */
226     public Binomial(Random random, int min, int max, double p) {
227       if (min >= max) {
228         throw new IllegalArgumentException("Invalid range");
229       }
230       this.random = random;
231       this.min = min;
232       this.n = max - min - 1;
233       if (n > 0) {
234         v = new double[n + 1];
235         double sum = 0.0;
236         for (int i = 0; i <= n; ++i) {
237           sum += select(n, i) * power(p, i) * power(1 - p, n - i);
238           v[i] = sum;
239         }
240         for (int i = 0; i <= n; ++i) {
241           v[i] /= sum;
242         }
243       }
244       else {
245         v = null;
246       }
247     }
248 
249     /**
250      * @see DiscreteRNG#nextInt()
251      */
252     @Override
253     public int nextInt() {
254       if (v == null) {
255         return min;
256       }
257       double d = random.nextDouble();
258       int idx = Arrays.binarySearch(v, d);
259       if (idx > 0) {
260         ++idx;
261       } else {
262         idx = -(idx + 1);
263       }
264 
265       if (idx >= v.length) {
266         idx = v.length - 1;
267       }
268       return idx + min;
269     }
270   }
271 }