001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapred.join;
020    
021    import java.io.IOException;
022    import java.util.ArrayList;
023    import java.util.Comparator;
024    import java.util.PriorityQueue;
025    
026    import org.apache.hadoop.classification.InterfaceAudience;
027    import org.apache.hadoop.classification.InterfaceStability;
028    import org.apache.hadoop.conf.Configurable;
029    import org.apache.hadoop.conf.Configuration;
030    import org.apache.hadoop.io.Writable;
031    import org.apache.hadoop.io.WritableComparable;
032    import org.apache.hadoop.io.WritableComparator;
033    import org.apache.hadoop.io.WritableUtils;
034    import org.apache.hadoop.mapred.RecordReader;
035    import org.apache.hadoop.util.ReflectionUtils;
036    
037    /**
038     * A RecordReader that can effect joins of RecordReaders sharing a common key
039     * type and partitioning.
040     * 
041     * @deprecated Use 
042     * {@link org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader} instead
043     */
044    @Deprecated
045    @InterfaceAudience.Public
046    @InterfaceStability.Stable
047    public abstract class CompositeRecordReader<
048        K extends WritableComparable, // key type
049        V extends Writable,           // accepts RecordReader<K,V> as children
050        X extends Writable>           // emits Writables of this type
051        implements Configurable {
052    
053    
054      private int id;
055      private Configuration conf;
056      private final ResetableIterator<X> EMPTY = new ResetableIterator.EMPTY<X>();
057    
058      private WritableComparator cmp;
059      private Class<? extends WritableComparable> keyclass;
060      private PriorityQueue<ComposableRecordReader<K,?>> q;
061    
062      protected final JoinCollector jc;
063      protected final ComposableRecordReader<K,? extends V>[] kids;
064    
065      protected abstract boolean combine(Object[] srcs, TupleWritable value);
066    
067      /**
068       * Create a RecordReader with <tt>capacity</tt> children to position
069       * <tt>id</tt> in the parent reader.
070       * The id of a root CompositeRecordReader is -1 by convention, but relying
071       * on this is not recommended.
072       */
073      @SuppressWarnings("unchecked") // Generic array assignment
074      public CompositeRecordReader(int id, int capacity,
075          Class<? extends WritableComparator> cmpcl)
076          throws IOException {
077        assert capacity > 0 : "Invalid capacity";
078        this.id = id;
079        if (null != cmpcl) {
080          cmp = ReflectionUtils.newInstance(cmpcl, null);
081          q = new PriorityQueue<ComposableRecordReader<K,?>>(3,
082              new Comparator<ComposableRecordReader<K,?>>() {
083                public int compare(ComposableRecordReader<K,?> o1,
084                                   ComposableRecordReader<K,?> o2) {
085                  return cmp.compare(o1.key(), o2.key());
086                }
087              });
088        }
089        jc = new JoinCollector(capacity);
090        kids = new ComposableRecordReader[capacity];
091      }
092    
093      /**
094       * Return the position in the collector this class occupies.
095       */
096      public int id() {
097        return id;
098      }
099    
100      /**
101       * {@inheritDoc}
102       */
103      public void setConf(Configuration conf) {
104        this.conf = conf;
105      }
106    
107      /**
108       * {@inheritDoc}
109       */
110      public Configuration getConf() {
111        return conf;
112      }
113    
114      /**
115       * Return sorted list of RecordReaders for this composite.
116       */
117      protected PriorityQueue<ComposableRecordReader<K,?>> getRecordReaderQueue() {
118        return q;
119      }
120    
121      /**
122       * Return comparator defining the ordering for RecordReaders in this
123       * composite.
124       */
125      protected WritableComparator getComparator() {
126        return cmp;
127      }
128    
129      /**
130       * Add a RecordReader to this collection.
131       * The id() of a RecordReader determines where in the Tuple its
132       * entry will appear. Adding RecordReaders with the same id has
133       * undefined behavior.
134       */
135      public void add(ComposableRecordReader<K,? extends V> rr) throws IOException {
136        kids[rr.id()] = rr;
137        if (null == q) {
138          cmp = WritableComparator.get(rr.createKey().getClass());
139          q = new PriorityQueue<ComposableRecordReader<K,?>>(3,
140              new Comparator<ComposableRecordReader<K,?>>() {
141                public int compare(ComposableRecordReader<K,?> o1,
142                                   ComposableRecordReader<K,?> o2) {
143                  return cmp.compare(o1.key(), o2.key());
144                }
145              });
146        }
147        if (rr.hasNext()) {
148          q.add(rr);
149        }
150      }
151    
152      /**
153       * Collector for join values.
154       * This accumulates values for a given key from the child RecordReaders. If
155       * one or more child RR contain duplicate keys, this will emit the cross
156       * product of the associated values until exhausted.
157       */
158      class JoinCollector {
159        private K key;
160        private ResetableIterator<X>[] iters;
161        private int pos = -1;
162        private boolean first = true;
163    
164        /**
165         * Construct a collector capable of handling the specified number of
166         * children.
167         */
168        @SuppressWarnings("unchecked") // Generic array assignment
169        public JoinCollector(int card) {
170          iters = new ResetableIterator[card];
171          for (int i = 0; i < iters.length; ++i) {
172            iters[i] = EMPTY;
173          }
174        }
175    
176        /**
177         * Register a given iterator at position id.
178         */
179        public void add(int id, ResetableIterator<X> i)
180            throws IOException {
181          iters[id] = i;
182        }
183    
184        /**
185         * Return the key associated with this collection.
186         */
187        public K key() {
188          return key;
189        }
190    
191        /**
192         * Codify the contents of the collector to be iterated over.
193         * When this is called, all RecordReaders registered for this
194         * key should have added ResetableIterators.
195         */
196        public void reset(K key) {
197          this.key = key;
198          first = true;
199          pos = iters.length - 1;
200          for (int i = 0; i < iters.length; ++i) {
201            iters[i].reset();
202          }
203        }
204    
205        /**
206         * Clear all state information.
207         */
208        public void clear() {
209          key = null;
210          pos = -1;
211          for (int i = 0; i < iters.length; ++i) {
212            iters[i].clear();
213            iters[i] = EMPTY;
214          }
215        }
216    
217        /**
218         * Returns false if exhausted or if reset(K) has not been called.
219         */
220        protected boolean hasNext() {
221          return !(pos < 0);
222        }
223    
224        /**
225         * Populate Tuple from iterators.
226         * It should be the case that, given iterators i_1...i_n over values from
227         * sources s_1...s_n sharing key k, repeated calls to next should yield
228         * I x I.
229         */
230        @SuppressWarnings("unchecked") // No static typeinfo on Tuples
231        protected boolean next(TupleWritable val) throws IOException {
232          if (first) {
233            int i = -1;
234            for (pos = 0; pos < iters.length; ++pos) {
235              if (iters[pos].hasNext() && iters[pos].next((X)val.get(pos))) {
236                i = pos;
237                val.setWritten(i);
238              }
239            }
240            pos = i;
241            first = false;
242            if (pos < 0) {
243              clear();
244              return false;
245            }
246            return true;
247          }
248          while (0 <= pos && !(iters[pos].hasNext() &&
249                               iters[pos].next((X)val.get(pos)))) {
250            --pos;
251          }
252          if (pos < 0) {
253            clear();
254            return false;
255          }
256          val.setWritten(pos);
257          for (int i = 0; i < pos; ++i) {
258            if (iters[i].replay((X)val.get(i))) {
259              val.setWritten(i);
260            }
261          }
262          while (pos + 1 < iters.length) {
263            ++pos;
264            iters[pos].reset();
265            if (iters[pos].hasNext() && iters[pos].next((X)val.get(pos))) {
266              val.setWritten(pos);
267            }
268          }
269          return true;
270        }
271    
272        /**
273         * Replay the last Tuple emitted.
274         */
275        @SuppressWarnings("unchecked") // No static typeinfo on Tuples
276        public boolean replay(TupleWritable val) throws IOException {
277          // The last emitted tuple might have drawn on an empty source;
278          // it can't be cleared prematurely, b/c there may be more duplicate
279          // keys in iterator positions < pos
280          assert !first;
281          boolean ret = false;
282          for (int i = 0; i < iters.length; ++i) {
283            if (iters[i].replay((X)val.get(i))) {
284              val.setWritten(i);
285              ret = true;
286            }
287          }
288          return ret;
289        }
290    
291        /**
292         * Close all child iterators.
293         */
294        public void close() throws IOException {
295          for (int i = 0; i < iters.length; ++i) {
296            iters[i].close();
297          }
298        }
299    
300        /**
301         * Write the next value into key, value as accepted by the operation
302         * associated with this set of RecordReaders.
303         */
304        public boolean flush(TupleWritable value) throws IOException {
305          while (hasNext()) {
306            value.clearWritten();
307            if (next(value) && combine(kids, value)) {
308              return true;
309            }
310          }
311          return false;
312        }
313      }
314    
315      /**
316       * Return the key for the current join or the value at the top of the
317       * RecordReader heap.
318       */
319      public K key() {
320        if (jc.hasNext()) {
321          return jc.key();
322        }
323        if (!q.isEmpty()) {
324          return q.peek().key();
325        }
326        return null;
327      }
328    
329      /**
330       * Clone the key at the top of this RR into the given object.
331       */
332      public void key(K key) throws IOException {
333        WritableUtils.cloneInto(key, key());
334      }
335    
336      /**
337       * Return true if it is possible that this could emit more values.
338       */
339      public boolean hasNext() {
340        return jc.hasNext() || !q.isEmpty();
341      }
342    
343      /**
344       * Pass skip key to child RRs.
345       */
346      public void skip(K key) throws IOException {
347        ArrayList<ComposableRecordReader<K,?>> tmp =
348          new ArrayList<ComposableRecordReader<K,?>>();
349        while (!q.isEmpty() && cmp.compare(q.peek().key(), key) <= 0) {
350          tmp.add(q.poll());
351        }
352        for (ComposableRecordReader<K,?> rr : tmp) {
353          rr.skip(key);
354          if (rr.hasNext()) {
355            q.add(rr);
356          }
357        }
358      }
359    
360      /**
361       * Obtain an iterator over the child RRs apropos of the value type
362       * ultimately emitted from this join.
363       */
364      protected abstract ResetableIterator<X> getDelegate();
365    
366      /**
367       * If key provided matches that of this Composite, give JoinCollector
368       * iterator over values it may emit.
369       */
370      @SuppressWarnings("unchecked") // No values from static EMPTY class
371      public void accept(CompositeRecordReader.JoinCollector jc, K key)
372          throws IOException {
373        if (hasNext() && 0 == cmp.compare(key, key())) {
374          fillJoinCollector(createKey());
375          jc.add(id, getDelegate());
376          return;
377        }
378        jc.add(id, EMPTY);
379      }
380    
381      /**
382       * For all child RRs offering the key provided, obtain an iterator
383       * at that position in the JoinCollector.
384       */
385      protected void fillJoinCollector(K iterkey) throws IOException {
386        if (!q.isEmpty()) {
387          q.peek().key(iterkey);
388          while (0 == cmp.compare(q.peek().key(), iterkey)) {
389            ComposableRecordReader<K,?> t = q.poll();
390            t.accept(jc, iterkey);
391            if (t.hasNext()) {
392              q.add(t);
393            } else if (q.isEmpty()) {
394              return;
395            }
396          }
397        }
398      }
399    
400      /**
401       * Implement Comparable contract (compare key of join or head of heap
402       * with that of another).
403       */
404      public int compareTo(ComposableRecordReader<K,?> other) {
405        return cmp.compare(key(), other.key());
406      }
407    
408      /**
409       * Create a new key value common to all child RRs.
410       * @throws ClassCastException if key classes differ.
411       */
412      @SuppressWarnings("unchecked") // Explicit check for key class agreement
413      public K createKey() {
414        if (null == keyclass) {
415          final Class<?> cls = kids[0].createKey().getClass();
416          for (RecordReader<K,? extends Writable> rr : kids) {
417            if (!cls.equals(rr.createKey().getClass())) {
418              throw new ClassCastException("Child key classes fail to agree");
419            }
420          }
421          keyclass = cls.asSubclass(WritableComparable.class);
422        }
423        return (K) ReflectionUtils.newInstance(keyclass, getConf());
424      }
425    
426      /**
427       * Create a value to be used internally for joins.
428       */
429      protected TupleWritable createInternalValue() {
430        Writable[] vals = new Writable[kids.length];
431        for (int i = 0; i < vals.length; ++i) {
432          vals[i] = kids[i].createValue();
433        }
434        return new TupleWritable(vals);
435      }
436    
437      /**
438       * Unsupported (returns zero in all cases).
439       */
440      public long getPos() throws IOException {
441        return 0;
442      }
443    
444      /**
445       * Close all child RRs.
446       */
447      public void close() throws IOException {
448        if (kids != null) {
449          for (RecordReader<K,? extends Writable> rr : kids) {
450            rr.close();
451          }
452        }
453        if (jc != null) {
454          jc.close();
455        }
456      }
457    
458      /**
459       * Report progress as the minimum of all child RR progress.
460       */
461      public float getProgress() throws IOException {
462        float ret = 1.0f;
463        for (RecordReader<K,? extends Writable> rr : kids) {
464          ret = Math.min(ret, rr.getProgress());
465        }
466        return ret;
467      }
468    }