1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package datafu.pig.stats; |
18 | |
|
19 | |
import java.io.IOException; |
20 | |
|
21 | |
import org.apache.commons.math.MathException; |
22 | |
import org.apache.commons.math.distribution.NormalDistribution; |
23 | |
import org.apache.commons.math.distribution.NormalDistributionImpl; |
24 | |
import org.apache.pig.data.DataType; |
25 | |
import org.apache.pig.data.Tuple; |
26 | |
import org.apache.pig.data.TupleFactory; |
27 | |
import org.apache.pig.impl.logicalLayer.schema.Schema; |
28 | |
|
29 | |
import com.google.common.collect.ImmutableList; |
30 | |
|
31 | |
import datafu.pig.util.SimpleEvalFunc; |
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
public class WilsonBinConf extends SimpleEvalFunc<Tuple> |
56 | |
{ |
57 | 1 | private static TupleFactory tupleFactory = TupleFactory.getInstance(); |
58 | |
private final double alpha; |
59 | |
|
60 | |
public WilsonBinConf(double alpha) |
61 | 26 | { |
62 | 26 | this.alpha = alpha; |
63 | 26 | } |
64 | |
|
65 | |
public WilsonBinConf(String alpha) |
66 | |
{ |
67 | 26 | this(Double.parseDouble(alpha)); |
68 | 26 | } |
69 | |
|
70 | |
public Tuple call(Number x, Number n) throws IOException |
71 | |
{ |
72 | 7 | if (x == null || n == null) |
73 | 0 | return null; |
74 | 7 | return binconf(x.longValue(), n.longValue()); |
75 | |
} |
76 | |
|
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
public Tuple binconf(Long x, Long n) throws IOException |
83 | |
{ |
84 | 7 | NormalDistribution normalDist = new NormalDistributionImpl(); |
85 | |
|
86 | 7 | if (x == null || n == null) |
87 | 0 | return null; |
88 | 7 | if (x < 0 || n < 0) |
89 | 0 | throw new IllegalArgumentException("non-negative values expected"); |
90 | 7 | if (x > n) |
91 | 0 | throw new IllegalArgumentException("invariant violation: number of successes > number of obs"); |
92 | 7 | if (n == 0) |
93 | 0 | return tupleFactory.newTuple(ImmutableList.of(Double.valueOf(0), Double.valueOf(0))); |
94 | |
|
95 | |
try { |
96 | 7 | double zcrit = -1.0 * normalDist.inverseCumulativeProbability(alpha/2); |
97 | 7 | double z2 = zcrit * zcrit; |
98 | 7 | double p = x/(double)n; |
99 | |
|
100 | 7 | double a = p + z2/2/n; |
101 | 7 | double b = zcrit * Math.sqrt((p * (1 - p) + z2/4/n)/n); |
102 | 7 | double c = (1 + z2/n); |
103 | |
|
104 | 7 | double lower = (a - b) / c; |
105 | 7 | double upper = (a + b) / c; |
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | 7 | if (x == 1) |
114 | 2 | lower = -Math.log(1 - alpha)/n; |
115 | 7 | if (x == (n - 1)) |
116 | 2 | upper = 1 + Math.log(1 - alpha)/n; |
117 | |
|
118 | 7 | return tupleFactory.newTuple(ImmutableList.of(lower, upper)); |
119 | |
} |
120 | 0 | catch (MathException e) { |
121 | 0 | throw new IOException("math error", e); |
122 | |
} |
123 | |
} |
124 | |
|
125 | |
@Override |
126 | |
public Schema outputSchema(Schema input) |
127 | |
{ |
128 | 14 | return new Schema(ImmutableList.of( |
129 | |
new Schema.FieldSchema("lower", DataType.DOUBLE), |
130 | |
new Schema.FieldSchema("upper", DataType.DOUBLE))); |
131 | |
} |
132 | |
} |