001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapred.join; 020 021 import java.io.IOException; 022 import java.util.ArrayList; 023 import java.util.Comparator; 024 import java.util.PriorityQueue; 025 026 import org.apache.hadoop.classification.InterfaceAudience; 027 import org.apache.hadoop.classification.InterfaceStability; 028 import org.apache.hadoop.conf.Configurable; 029 import org.apache.hadoop.conf.Configuration; 030 import org.apache.hadoop.io.Writable; 031 import org.apache.hadoop.io.WritableComparable; 032 import org.apache.hadoop.io.WritableComparator; 033 import org.apache.hadoop.io.WritableUtils; 034 import org.apache.hadoop.mapred.RecordReader; 035 import org.apache.hadoop.util.ReflectionUtils; 036 037 /** 038 * A RecordReader that can effect joins of RecordReaders sharing a common key 039 * type and partitioning. 040 * 041 * @deprecated Use 042 * {@link org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader} instead 043 */ 044 @Deprecated 045 @InterfaceAudience.Public 046 @InterfaceStability.Stable 047 public abstract class CompositeRecordReader< 048 K extends WritableComparable, // key type 049 V extends Writable, // accepts RecordReader<K,V> as children 050 X extends Writable> // emits Writables of this type 051 implements Configurable { 052 053 054 private int id; 055 private Configuration conf; 056 private final ResetableIterator<X> EMPTY = new ResetableIterator.EMPTY<X>(); 057 058 private WritableComparator cmp; 059 private Class<? extends WritableComparable> keyclass; 060 private PriorityQueue<ComposableRecordReader<K,?>> q; 061 062 protected final JoinCollector jc; 063 protected final ComposableRecordReader<K,? extends V>[] kids; 064 065 protected abstract boolean combine(Object[] srcs, TupleWritable value); 066 067 /** 068 * Create a RecordReader with <tt>capacity</tt> children to position 069 * <tt>id</tt> in the parent reader. 070 * The id of a root CompositeRecordReader is -1 by convention, but relying 071 * on this is not recommended. 072 */ 073 @SuppressWarnings("unchecked") // Generic array assignment 074 public CompositeRecordReader(int id, int capacity, 075 Class<? extends WritableComparator> cmpcl) 076 throws IOException { 077 assert capacity > 0 : "Invalid capacity"; 078 this.id = id; 079 if (null != cmpcl) { 080 cmp = ReflectionUtils.newInstance(cmpcl, null); 081 q = new PriorityQueue<ComposableRecordReader<K,?>>(3, 082 new Comparator<ComposableRecordReader<K,?>>() { 083 public int compare(ComposableRecordReader<K,?> o1, 084 ComposableRecordReader<K,?> o2) { 085 return cmp.compare(o1.key(), o2.key()); 086 } 087 }); 088 } 089 jc = new JoinCollector(capacity); 090 kids = new ComposableRecordReader[capacity]; 091 } 092 093 /** 094 * Return the position in the collector this class occupies. 095 */ 096 public int id() { 097 return id; 098 } 099 100 /** 101 * {@inheritDoc} 102 */ 103 public void setConf(Configuration conf) { 104 this.conf = conf; 105 } 106 107 /** 108 * {@inheritDoc} 109 */ 110 public Configuration getConf() { 111 return conf; 112 } 113 114 /** 115 * Return sorted list of RecordReaders for this composite. 116 */ 117 protected PriorityQueue<ComposableRecordReader<K,?>> getRecordReaderQueue() { 118 return q; 119 } 120 121 /** 122 * Return comparator defining the ordering for RecordReaders in this 123 * composite. 124 */ 125 protected WritableComparator getComparator() { 126 return cmp; 127 } 128 129 /** 130 * Add a RecordReader to this collection. 131 * The id() of a RecordReader determines where in the Tuple its 132 * entry will appear. Adding RecordReaders with the same id has 133 * undefined behavior. 134 */ 135 public void add(ComposableRecordReader<K,? extends V> rr) throws IOException { 136 kids[rr.id()] = rr; 137 if (null == q) { 138 cmp = WritableComparator.get(rr.createKey().getClass()); 139 q = new PriorityQueue<ComposableRecordReader<K,?>>(3, 140 new Comparator<ComposableRecordReader<K,?>>() { 141 public int compare(ComposableRecordReader<K,?> o1, 142 ComposableRecordReader<K,?> o2) { 143 return cmp.compare(o1.key(), o2.key()); 144 } 145 }); 146 } 147 if (rr.hasNext()) { 148 q.add(rr); 149 } 150 } 151 152 /** 153 * Collector for join values. 154 * This accumulates values for a given key from the child RecordReaders. If 155 * one or more child RR contain duplicate keys, this will emit the cross 156 * product of the associated values until exhausted. 157 */ 158 class JoinCollector { 159 private K key; 160 private ResetableIterator<X>[] iters; 161 private int pos = -1; 162 private boolean first = true; 163 164 /** 165 * Construct a collector capable of handling the specified number of 166 * children. 167 */ 168 @SuppressWarnings("unchecked") // Generic array assignment 169 public JoinCollector(int card) { 170 iters = new ResetableIterator[card]; 171 for (int i = 0; i < iters.length; ++i) { 172 iters[i] = EMPTY; 173 } 174 } 175 176 /** 177 * Register a given iterator at position id. 178 */ 179 public void add(int id, ResetableIterator<X> i) 180 throws IOException { 181 iters[id] = i; 182 } 183 184 /** 185 * Return the key associated with this collection. 186 */ 187 public K key() { 188 return key; 189 } 190 191 /** 192 * Codify the contents of the collector to be iterated over. 193 * When this is called, all RecordReaders registered for this 194 * key should have added ResetableIterators. 195 */ 196 public void reset(K key) { 197 this.key = key; 198 first = true; 199 pos = iters.length - 1; 200 for (int i = 0; i < iters.length; ++i) { 201 iters[i].reset(); 202 } 203 } 204 205 /** 206 * Clear all state information. 207 */ 208 public void clear() { 209 key = null; 210 pos = -1; 211 for (int i = 0; i < iters.length; ++i) { 212 iters[i].clear(); 213 iters[i] = EMPTY; 214 } 215 } 216 217 /** 218 * Returns false if exhausted or if reset(K) has not been called. 219 */ 220 protected boolean hasNext() { 221 return !(pos < 0); 222 } 223 224 /** 225 * Populate Tuple from iterators. 226 * It should be the case that, given iterators i_1...i_n over values from 227 * sources s_1...s_n sharing key k, repeated calls to next should yield 228 * I x I. 229 */ 230 @SuppressWarnings("unchecked") // No static typeinfo on Tuples 231 protected boolean next(TupleWritable val) throws IOException { 232 if (first) { 233 int i = -1; 234 for (pos = 0; pos < iters.length; ++pos) { 235 if (iters[pos].hasNext() && iters[pos].next((X)val.get(pos))) { 236 i = pos; 237 val.setWritten(i); 238 } 239 } 240 pos = i; 241 first = false; 242 if (pos < 0) { 243 clear(); 244 return false; 245 } 246 return true; 247 } 248 while (0 <= pos && !(iters[pos].hasNext() && 249 iters[pos].next((X)val.get(pos)))) { 250 --pos; 251 } 252 if (pos < 0) { 253 clear(); 254 return false; 255 } 256 val.setWritten(pos); 257 for (int i = 0; i < pos; ++i) { 258 if (iters[i].replay((X)val.get(i))) { 259 val.setWritten(i); 260 } 261 } 262 while (pos + 1 < iters.length) { 263 ++pos; 264 iters[pos].reset(); 265 if (iters[pos].hasNext() && iters[pos].next((X)val.get(pos))) { 266 val.setWritten(pos); 267 } 268 } 269 return true; 270 } 271 272 /** 273 * Replay the last Tuple emitted. 274 */ 275 @SuppressWarnings("unchecked") // No static typeinfo on Tuples 276 public boolean replay(TupleWritable val) throws IOException { 277 // The last emitted tuple might have drawn on an empty source; 278 // it can't be cleared prematurely, b/c there may be more duplicate 279 // keys in iterator positions < pos 280 assert !first; 281 boolean ret = false; 282 for (int i = 0; i < iters.length; ++i) { 283 if (iters[i].replay((X)val.get(i))) { 284 val.setWritten(i); 285 ret = true; 286 } 287 } 288 return ret; 289 } 290 291 /** 292 * Close all child iterators. 293 */ 294 public void close() throws IOException { 295 for (int i = 0; i < iters.length; ++i) { 296 iters[i].close(); 297 } 298 } 299 300 /** 301 * Write the next value into key, value as accepted by the operation 302 * associated with this set of RecordReaders. 303 */ 304 public boolean flush(TupleWritable value) throws IOException { 305 while (hasNext()) { 306 value.clearWritten(); 307 if (next(value) && combine(kids, value)) { 308 return true; 309 } 310 } 311 return false; 312 } 313 } 314 315 /** 316 * Return the key for the current join or the value at the top of the 317 * RecordReader heap. 318 */ 319 public K key() { 320 if (jc.hasNext()) { 321 return jc.key(); 322 } 323 if (!q.isEmpty()) { 324 return q.peek().key(); 325 } 326 return null; 327 } 328 329 /** 330 * Clone the key at the top of this RR into the given object. 331 */ 332 public void key(K key) throws IOException { 333 WritableUtils.cloneInto(key, key()); 334 } 335 336 /** 337 * Return true if it is possible that this could emit more values. 338 */ 339 public boolean hasNext() { 340 return jc.hasNext() || !q.isEmpty(); 341 } 342 343 /** 344 * Pass skip key to child RRs. 345 */ 346 public void skip(K key) throws IOException { 347 ArrayList<ComposableRecordReader<K,?>> tmp = 348 new ArrayList<ComposableRecordReader<K,?>>(); 349 while (!q.isEmpty() && cmp.compare(q.peek().key(), key) <= 0) { 350 tmp.add(q.poll()); 351 } 352 for (ComposableRecordReader<K,?> rr : tmp) { 353 rr.skip(key); 354 if (rr.hasNext()) { 355 q.add(rr); 356 } 357 } 358 } 359 360 /** 361 * Obtain an iterator over the child RRs apropos of the value type 362 * ultimately emitted from this join. 363 */ 364 protected abstract ResetableIterator<X> getDelegate(); 365 366 /** 367 * If key provided matches that of this Composite, give JoinCollector 368 * iterator over values it may emit. 369 */ 370 @SuppressWarnings("unchecked") // No values from static EMPTY class 371 public void accept(CompositeRecordReader.JoinCollector jc, K key) 372 throws IOException { 373 if (hasNext() && 0 == cmp.compare(key, key())) { 374 fillJoinCollector(createKey()); 375 jc.add(id, getDelegate()); 376 return; 377 } 378 jc.add(id, EMPTY); 379 } 380 381 /** 382 * For all child RRs offering the key provided, obtain an iterator 383 * at that position in the JoinCollector. 384 */ 385 protected void fillJoinCollector(K iterkey) throws IOException { 386 if (!q.isEmpty()) { 387 q.peek().key(iterkey); 388 while (0 == cmp.compare(q.peek().key(), iterkey)) { 389 ComposableRecordReader<K,?> t = q.poll(); 390 t.accept(jc, iterkey); 391 if (t.hasNext()) { 392 q.add(t); 393 } else if (q.isEmpty()) { 394 return; 395 } 396 } 397 } 398 } 399 400 /** 401 * Implement Comparable contract (compare key of join or head of heap 402 * with that of another). 403 */ 404 public int compareTo(ComposableRecordReader<K,?> other) { 405 return cmp.compare(key(), other.key()); 406 } 407 408 /** 409 * Create a new key value common to all child RRs. 410 * @throws ClassCastException if key classes differ. 411 */ 412 @SuppressWarnings("unchecked") // Explicit check for key class agreement 413 public K createKey() { 414 if (null == keyclass) { 415 final Class<?> cls = kids[0].createKey().getClass(); 416 for (RecordReader<K,? extends Writable> rr : kids) { 417 if (!cls.equals(rr.createKey().getClass())) { 418 throw new ClassCastException("Child key classes fail to agree"); 419 } 420 } 421 keyclass = cls.asSubclass(WritableComparable.class); 422 } 423 return (K) ReflectionUtils.newInstance(keyclass, getConf()); 424 } 425 426 /** 427 * Create a value to be used internally for joins. 428 */ 429 protected TupleWritable createInternalValue() { 430 Writable[] vals = new Writable[kids.length]; 431 for (int i = 0; i < vals.length; ++i) { 432 vals[i] = kids[i].createValue(); 433 } 434 return new TupleWritable(vals); 435 } 436 437 /** 438 * Unsupported (returns zero in all cases). 439 */ 440 public long getPos() throws IOException { 441 return 0; 442 } 443 444 /** 445 * Close all child RRs. 446 */ 447 public void close() throws IOException { 448 if (kids != null) { 449 for (RecordReader<K,? extends Writable> rr : kids) { 450 rr.close(); 451 } 452 } 453 if (jc != null) { 454 jc.close(); 455 } 456 } 457 458 /** 459 * Report progress as the minimum of all child RR progress. 460 */ 461 public float getProgress() throws IOException { 462 float ret = 1.0f; 463 for (RecordReader<K,? extends Writable> rr : kids) { 464 ret = Math.min(ret, rr.getProgress()); 465 } 466 return ret; 467 } 468 }