Coverage Report - datafu.pig.bags.Enumerate
 
Classes in this File Line Coverage Branch Coverage Complexity
Enumerate
81%
31/38
71%
10/14
4
 
 1  
 /*
 2  
  * Copyright 2010 LinkedIn, Inc
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 5  
  * use this file except in compliance with the License. You may obtain a copy of
 6  
  * the License at
 7  
  * 
 8  
  * http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 12  
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 13  
  * License for the specific language governing permissions and limitations under
 14  
  * the License.
 15  
  */
 16  
  
 17  
 package datafu.pig.bags;
 18  
 
 19  
 import java.io.IOException;
 20  
 
 21  
 import org.apache.pig.data.BagFactory;
 22  
 import org.apache.pig.data.DataBag;
 23  
 import org.apache.pig.data.DataType;
 24  
 import org.apache.pig.data.Tuple;
 25  
 import org.apache.pig.data.TupleFactory;
 26  
 import org.apache.pig.impl.logicalLayer.FrontendException;
 27  
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 28  
 
 29  
 import datafu.pig.util.SimpleEvalFunc;
 30  
 
 31  
 /**
 32  
  * Enumerate through a bag, replacing each (elem) with (elem, idx). For example:
 33  
  * <pre>
 34  
  *   {(A),(B),(C),(D)} => {(A,0),(B,1),(C,2),(D,3)}
 35  
  * </pre>
 36  
  * The first constructor parameter (optional) dictates the starting index of the counting.
 37  
  * <p>
 38  
  * Example:
 39  
  * <pre>
 40  
  * {@code
 41  
  * define Enumerate datafu.pig.bags.Enumerate('1');
 42  
  *
 43  
  * -- input:
 44  
  * -- ({(100),(200),(300),(400)})
 45  
  * input = LOAD 'input' as (B: bag{T: tuple(v2:INT)});
 46  
  *
 47  
  * -- output:
 48  
  * -- ({(100,1),(200,2),(300,3),(400,4)})
 49  
  * output = FOREACH input GENERATE Enumerate(B);
 50  
  * }
 51  
  * </pre>
 52  
  */
 53  
 public class Enumerate extends SimpleEvalFunc<DataBag>
 54  
 {
 55  
   private final int start;
 56  6960
   private Boolean reverse = false;
 57  
 
 58  
   public Enumerate()
 59  1763
   {
 60  1763
     this.start = 0;
 61  1763
   }
 62  
 
 63  
   public Enumerate(String start)
 64  5197
   {
 65  5197
     this.start = Integer.parseInt(start);
 66  5197
   }
 67  
   
 68  
   public Enumerate(String start, String reverse)
 69  
   {
 70  1675
     this(start);
 71  1675
     if (reverse != null) this.reverse = Boolean.valueOf(reverse);
 72  1675
   }
 73  
 
 74  
   public DataBag call(DataBag inputBag) throws IOException
 75  
   {
 76  4
     DataBag outputBag = BagFactory.getInstance().newDefaultBag();
 77  4
     long i = start;
 78  4
     if (reverse) i = inputBag.size() - 1 + start;
 79  
 
 80  4
     for (Tuple t : inputBag) {
 81  17
       Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll());
 82  17
       t1.append(i);
 83  17
       outputBag.add(t1);
 84  17
       if (reverse)
 85  5
         i--;
 86  
       else
 87  12
         i++;
 88  17
     }
 89  
 
 90  4
     return outputBag;
 91  
   }
 92  
 
 93  
   @Override
 94  
   public Schema outputSchema(Schema input)
 95  
   {
 96  
     try {
 97  3660
       if (input.size() != 1)
 98  
       {
 99  0
         throw new RuntimeException("Expected input to have only a single field");
 100  
       }
 101  
       
 102  3660
       Schema.FieldSchema inputFieldSchema = input.getField(0);
 103  
 
 104  3660
       if (inputFieldSchema.type != DataType.BAG)
 105  
       {
 106  0
         throw new RuntimeException("Expected a BAG as input");
 107  
       }
 108  
       
 109  3660
       Schema inputBagSchema = inputFieldSchema.schema;
 110  
 
 111  3660
       if (inputBagSchema.getField(0).type != DataType.TUPLE)
 112  
       {
 113  0
         throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
 114  
                                                  DataType.findTypeName(inputBagSchema.getField(0).type)));
 115  
       }
 116  
       
 117  3660
       Schema inputTupleSchema = inputBagSchema.getField(0).schema;
 118  
       
 119  3660
       Schema outputTupleSchema = inputTupleSchema.clone();
 120  3660
       outputTupleSchema.add(new Schema.FieldSchema("i", DataType.LONG));
 121  
       
 122  3660
       return new Schema(new Schema.FieldSchema(
 123  
             getSchemaName(this.getClass().getName().toLowerCase(), input),
 124  
             outputTupleSchema, 
 125  
             DataType.BAG));
 126  
     }
 127  0
     catch (CloneNotSupportedException e) {
 128  0
       throw new RuntimeException(e);
 129  
     }
 130  0
     catch (FrontendException e) {
 131  0
       throw new RuntimeException(e);
 132  
     }
 133  
   }
 134  
 }