1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package datafu.pig.bags; |
18 | |
|
19 | |
import java.io.IOException; |
20 | |
|
21 | |
import org.apache.pig.data.BagFactory; |
22 | |
import org.apache.pig.data.DataBag; |
23 | |
import org.apache.pig.data.DataType; |
24 | |
import org.apache.pig.data.Tuple; |
25 | |
import org.apache.pig.data.TupleFactory; |
26 | |
import org.apache.pig.impl.logicalLayer.FrontendException; |
27 | |
import org.apache.pig.impl.logicalLayer.schema.Schema; |
28 | |
|
29 | |
import datafu.pig.util.SimpleEvalFunc; |
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
public class Enumerate extends SimpleEvalFunc<DataBag> |
54 | |
{ |
55 | |
private final int start; |
56 | 6960 | private Boolean reverse = false; |
57 | |
|
58 | |
public Enumerate() |
59 | 1763 | { |
60 | 1763 | this.start = 0; |
61 | 1763 | } |
62 | |
|
63 | |
public Enumerate(String start) |
64 | 5197 | { |
65 | 5197 | this.start = Integer.parseInt(start); |
66 | 5197 | } |
67 | |
|
68 | |
public Enumerate(String start, String reverse) |
69 | |
{ |
70 | 1675 | this(start); |
71 | 1675 | if (reverse != null) this.reverse = Boolean.valueOf(reverse); |
72 | 1675 | } |
73 | |
|
74 | |
public DataBag call(DataBag inputBag) throws IOException |
75 | |
{ |
76 | 4 | DataBag outputBag = BagFactory.getInstance().newDefaultBag(); |
77 | 4 | long i = start; |
78 | 4 | if (reverse) i = inputBag.size() - 1 + start; |
79 | |
|
80 | 4 | for (Tuple t : inputBag) { |
81 | 17 | Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); |
82 | 17 | t1.append(i); |
83 | 17 | outputBag.add(t1); |
84 | 17 | if (reverse) |
85 | 5 | i--; |
86 | |
else |
87 | 12 | i++; |
88 | 17 | } |
89 | |
|
90 | 4 | return outputBag; |
91 | |
} |
92 | |
|
93 | |
@Override |
94 | |
public Schema outputSchema(Schema input) |
95 | |
{ |
96 | |
try { |
97 | 3660 | if (input.size() != 1) |
98 | |
{ |
99 | 0 | throw new RuntimeException("Expected input to have only a single field"); |
100 | |
} |
101 | |
|
102 | 3660 | Schema.FieldSchema inputFieldSchema = input.getField(0); |
103 | |
|
104 | 3660 | if (inputFieldSchema.type != DataType.BAG) |
105 | |
{ |
106 | 0 | throw new RuntimeException("Expected a BAG as input"); |
107 | |
} |
108 | |
|
109 | 3660 | Schema inputBagSchema = inputFieldSchema.schema; |
110 | |
|
111 | 3660 | if (inputBagSchema.getField(0).type != DataType.TUPLE) |
112 | |
{ |
113 | 0 | throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", |
114 | |
DataType.findTypeName(inputBagSchema.getField(0).type))); |
115 | |
} |
116 | |
|
117 | 3660 | Schema inputTupleSchema = inputBagSchema.getField(0).schema; |
118 | |
|
119 | 3660 | Schema outputTupleSchema = inputTupleSchema.clone(); |
120 | 3660 | outputTupleSchema.add(new Schema.FieldSchema("i", DataType.LONG)); |
121 | |
|
122 | 3660 | return new Schema(new Schema.FieldSchema( |
123 | |
getSchemaName(this.getClass().getName().toLowerCase(), input), |
124 | |
outputTupleSchema, |
125 | |
DataType.BAG)); |
126 | |
} |
127 | 0 | catch (CloneNotSupportedException e) { |
128 | 0 | throw new RuntimeException(e); |
129 | |
} |
130 | 0 | catch (FrontendException e) { |
131 | 0 | throw new RuntimeException(e); |
132 | |
} |
133 | |
} |
134 | |
} |