Coverage Report - datafu.pig.bags.sets.SetUnion
 
Classes in this File Line Coverage Branch Coverage Complexity
SetUnion
82%
14/17
87%
7/8
9
 
 1  
 /*
 2  
  * Copyright 2010 LinkedIn, Inc
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 5  
  * use this file except in compliance with the License. You may obtain a copy of
 6  
  * the License at
 7  
  * 
 8  
  * http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 12  
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 13  
  * License for the specific language governing permissions and limitations under
 14  
  * the License.
 15  
  */
 16  
  
 17  
 package datafu.pig.bags.sets;
 18  
 
 19  
 import java.io.IOException;
 20  
 import java.util.HashSet;
 21  
 import java.util.Set;
 22  
 
 23  
 import org.apache.pig.data.BagFactory;
 24  
 import org.apache.pig.data.DataBag;
 25  
 import org.apache.pig.data.Tuple;
 26  
 import org.apache.pig.data.TupleFactory;
 27  
 
 28  
 /**
 29  
  * Computes the set union of two or more bags.  Duplicates are eliminated.
 30  
  * <p>
 31  
  * Example:
 32  
  * <pre>
 33  
  * {@code
 34  
  * define SetUnion datafu.pig.bags.sets.SetUnion();
 35  
  * 
 36  
  * -- input:
 37  
  * -- ({(2,20),(3,30),(4,40)},{(1,10),(2,20),(4,40),(8,80)})
 38  
  * input = LOAD 'input' AS (B1:bag{T:tuple(val1:int,val2:int)},B2:bag{T:tuple(val1:int,val2:int)});
 39  
  * 
 40  
  * -- output:
 41  
  * -- ({(2,20),(3,30),(4,40),(1,10),(8,80)})
 42  
  * output = FOREACH input GENERATE SetUnion(B1,B2);
 43  
  * }
 44  
  * </pre>
 45  
  */
 46  214
 public class SetUnion extends SetOperationsBase
 47  
 {
 48  1
   private static final BagFactory bagFactory = BagFactory.getInstance();
 49  1
   private static final TupleFactory tupleFactory = TupleFactory.getInstance();
 50  
 
 51  
   @Override
 52  
   public DataBag exec(Tuple input) throws IOException
 53  
   {
 54  4
     Set<Object> seen = new HashSet<Object>();
 55  4
     DataBag outputBag = bagFactory.newDefaultBag();
 56  
 
 57  
     try {
 58  12
       for (int i=0; i < input.size(); i++) {
 59  8
         Object o = input.get(i);
 60  8
         if (!(o instanceof DataBag))
 61  0
           throw new RuntimeException("parameters must be databags");
 62  
 
 63  8
         DataBag inputBag = (DataBag) o;
 64  8
         for (Tuple elem : inputBag) {
 65  55
           if (!seen.contains(elem)) {
 66  38
             outputBag.add(elem);
 67  38
             seen.add(elem);
 68  
           }
 69  
         }
 70  
       }
 71  
 
 72  4
       return outputBag;
 73  
     }
 74  0
     catch (Exception e) {
 75  0
       throw new IOException(e);
 76  
     }
 77  
   }
 78  
 }
 79  
 
 80