1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package datafu.pig.bags; |
18 | |
|
19 | |
import java.io.IOException; |
20 | |
import java.util.HashMap; |
21 | |
|
22 | |
import org.apache.pig.data.DataBag; |
23 | |
import org.apache.pig.data.DataType; |
24 | |
import org.apache.pig.impl.logicalLayer.FrontendException; |
25 | |
import org.apache.pig.impl.logicalLayer.schema.Schema; |
26 | |
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; |
27 | |
|
28 | |
import datafu.pig.util.SimpleEvalFunc; |
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
public class AliasBagFields extends SimpleEvalFunc<DataBag> |
51 | |
{ |
52 | 2216 | private final HashMap<String, String> aliasMap = new HashMap<String, String>(); |
53 | |
|
54 | |
public AliasBagFields(String map) |
55 | 2216 | { |
56 | 2216 | if (map == null || map.length() < 2) { |
57 | 0 | throw new RuntimeException("Malformed map string"); |
58 | |
} else { |
59 | 2216 | String fieldString = map.substring(1, map.length()-1); |
60 | 6648 | for (String pair : fieldString.split(",")) { |
61 | 4432 | String[] tokens = pair.split("#"); |
62 | 4432 | if (tokens.length != 2) { |
63 | 0 | throw new RuntimeException("Malformed map string"); |
64 | |
} else { |
65 | 4432 | aliasMap.put(tokens[0].replaceAll(" ", ""), tokens[1].replaceAll(" ", "")); |
66 | |
} |
67 | |
} |
68 | |
} |
69 | 2216 | } |
70 | |
|
71 | |
public DataBag call(DataBag inputBag) throws IOException |
72 | |
{ |
73 | 1 | return inputBag; |
74 | |
} |
75 | |
|
76 | |
@Override |
77 | |
public Schema outputSchema(Schema input) |
78 | |
{ |
79 | |
try { |
80 | 1181 | if (input.size() != 1) |
81 | |
{ |
82 | 0 | throw new RuntimeException("Expected input to have only a single field"); |
83 | |
} |
84 | |
|
85 | 1181 | Schema.FieldSchema inputFieldSchema = input.getField(0); |
86 | |
|
87 | 1181 | if (inputFieldSchema.type != DataType.BAG) |
88 | |
{ |
89 | 0 | throw new RuntimeException("Expected a BAG as input"); |
90 | |
} |
91 | |
|
92 | 1181 | Schema inputBagSchema = inputFieldSchema.schema; |
93 | |
|
94 | 1181 | if (inputBagSchema.getField(0).type != DataType.TUPLE) |
95 | |
{ |
96 | 0 | throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", |
97 | |
DataType.findTypeName(inputBagSchema.getField(0).type))); |
98 | |
} |
99 | |
|
100 | 1181 | Schema inputTupleSchema = inputBagSchema.getField(0).schema; |
101 | 1181 | Schema outputTupleSchema = inputTupleSchema.clone(); |
102 | 1181 | for (FieldSchema fieldSchema : outputTupleSchema.getFields()) { |
103 | 3543 | if (aliasMap.containsKey(fieldSchema.alias)) { |
104 | 2362 | fieldSchema.alias = aliasMap.get(fieldSchema.alias); |
105 | |
} |
106 | |
} |
107 | |
|
108 | 1181 | return new Schema(new Schema.FieldSchema( |
109 | |
getSchemaName(this.getClass().getName().toLowerCase(), input), |
110 | |
outputTupleSchema, |
111 | |
DataType.BAG)); |
112 | |
} |
113 | 0 | catch (CloneNotSupportedException e) { |
114 | 0 | throw new RuntimeException(e); |
115 | |
} |
116 | 0 | catch (FrontendException e) { |
117 | 0 | throw new RuntimeException(e); |
118 | |
} |
119 | |
} |
120 | |
} |