Coverage Report - datafu.pig.bags.AliasBagFields
 
Classes in this File Line Coverage Branch Coverage Complexity
AliasBagFields
70%
21/30
66%
12/18
7.333
 
 1  
 /*
 2  
  * Copyright 2011 LinkedIn, Inc
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 5  
  * use this file except in compliance with the License. You may obtain a copy of
 6  
  * the License at
 7  
  * 
 8  
  * http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 12  
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 13  
  * License for the specific language governing permissions and limitations under
 14  
  * the License.
 15  
  */
 16  
 
 17  
 package datafu.pig.bags;
 18  
 
 19  
 import java.io.IOException;
 20  
 import java.util.HashMap;
 21  
 
 22  
 import org.apache.pig.data.DataBag;
 23  
 import org.apache.pig.data.DataType;
 24  
 import org.apache.pig.impl.logicalLayer.FrontendException;
 25  
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 26  
 import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
 27  
 
 28  
 import datafu.pig.util.SimpleEvalFunc;
 29  
 
 30  
 /**
 31  
  * Re-alias the fields inside of a bag.  For example:
 32  
  * 
 33  
  * Example:
 34  
  * <pre>
 35  
  * {@code
 36  
  * define AliasBagFields datafu.pig.bags.AliasBagFields('[alpha#letter,numeric#decimal]');
 37  
  * 
 38  
  * -- input:
 39  
  * -- ({(a, 1),(b, 2),(c, 3),(d, 4)})
 40  
  * input = LOAD 'input' AS (B: bag {T: tuple(alpha:CHARARRAY, numeric:INT)});
 41  
  * 
 42  
  * output = FOREACH input GENERATE AliasBagFields(B);
 43  
  * 
 44  
  * output schema => (B: bag {T: tuple(letter:CHARARRAY, decimal:INT)});
 45  
  * } 
 46  
  * </pre>
 47  
  * 
 48  
  * @param map A string in Pig map format [key1#value1,key2#value2]
 49  
  */
 50  
 public class AliasBagFields extends SimpleEvalFunc<DataBag>
 51  
 {
 52  2216
   private final HashMap<String, String> aliasMap = new HashMap<String, String>();
 53  
   
 54  
   public AliasBagFields(String map)
 55  2216
   {
 56  2216
     if (map == null || map.length() < 2) {
 57  0
       throw new RuntimeException("Malformed map string");
 58  
     } else {
 59  2216
       String fieldString = map.substring(1, map.length()-1);
 60  6648
       for (String pair : fieldString.split(",")) {
 61  4432
         String[] tokens = pair.split("#");
 62  4432
         if (tokens.length != 2) {
 63  0
           throw new RuntimeException("Malformed map string");
 64  
         } else {
 65  4432
           aliasMap.put(tokens[0].replaceAll(" ", ""), tokens[1].replaceAll(" ", ""));
 66  
         }
 67  
       }
 68  
     }
 69  2216
   }
 70  
   
 71  
   public DataBag call(DataBag inputBag) throws IOException
 72  
   {
 73  1
     return inputBag;
 74  
   }
 75  
   
 76  
   @Override
 77  
   public Schema outputSchema(Schema input)
 78  
   {
 79  
     try {
 80  1181
       if (input.size() != 1)
 81  
       {
 82  0
         throw new RuntimeException("Expected input to have only a single field");
 83  
       }
 84  
       
 85  1181
       Schema.FieldSchema inputFieldSchema = input.getField(0);
 86  
 
 87  1181
       if (inputFieldSchema.type != DataType.BAG)
 88  
       {
 89  0
         throw new RuntimeException("Expected a BAG as input");
 90  
       }
 91  
       
 92  1181
       Schema inputBagSchema = inputFieldSchema.schema;
 93  
 
 94  1181
       if (inputBagSchema.getField(0).type != DataType.TUPLE)
 95  
       {
 96  0
         throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
 97  
                                                  DataType.findTypeName(inputBagSchema.getField(0).type)));
 98  
       }
 99  
       
 100  1181
       Schema inputTupleSchema = inputBagSchema.getField(0).schema;
 101  1181
       Schema outputTupleSchema = inputTupleSchema.clone();
 102  1181
       for (FieldSchema fieldSchema : outputTupleSchema.getFields()) {
 103  3543
         if (aliasMap.containsKey(fieldSchema.alias)) {
 104  2362
           fieldSchema.alias = aliasMap.get(fieldSchema.alias);
 105  
         }
 106  
       }
 107  
       
 108  1181
       return new Schema(new Schema.FieldSchema(
 109  
             getSchemaName(this.getClass().getName().toLowerCase(), input),
 110  
             outputTupleSchema, 
 111  
             DataType.BAG));
 112  
     }
 113  0
     catch (CloneNotSupportedException e) {
 114  0
       throw new RuntimeException(e);
 115  
     }
 116  0
     catch (FrontendException e) {
 117  0
       throw new RuntimeException(e);
 118  
     }
 119  
   }
 120  
 }