001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapreduce; 020 021 import java.io.IOException; 022 023 import org.apache.hadoop.classification.InterfaceAudience; 024 import org.apache.hadoop.classification.InterfaceStability; 025 /** 026 * <code>OutputCommitter</code> describes the commit of task output for a 027 * Map-Reduce job. 028 * 029 * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of 030 * the job to:<p> 031 * <ol> 032 * <li> 033 * Setup the job during initialization. For example, create the temporary 034 * output directory for the job during the initialization of the job. 035 * </li> 036 * <li> 037 * Cleanup the job after the job completion. For example, remove the 038 * temporary output directory after the job completion. 039 * </li> 040 * <li> 041 * Setup the task temporary output. 042 * </li> 043 * <li> 044 * Check whether a task needs a commit. This is to avoid the commit 045 * procedure if a task does not need commit. 046 * </li> 047 * <li> 048 * Commit of the task output. 049 * </li> 050 * <li> 051 * Discard the task commit. 052 * </li> 053 * </ol> 054 * 055 * @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 056 * @see JobContext 057 * @see TaskAttemptContext 058 * 059 */ 060 @InterfaceAudience.Public 061 @InterfaceStability.Stable 062 public abstract class OutputCommitter { 063 /** 064 * For the framework to setup the job output during initialization 065 * 066 * @param jobContext Context of the job whose output is being written. 067 * @throws IOException if temporary output could not be created 068 */ 069 public abstract void setupJob(JobContext jobContext) throws IOException; 070 071 /** 072 * For cleaning up the job's output after job completion 073 * 074 * @param jobContext Context of the job whose output is being written. 075 * @throws IOException 076 * @deprecated Use {@link #commitJob(JobContext)} or 077 * {@link #abortJob(JobContext, JobStatus.State)} instead. 078 */ 079 @Deprecated 080 public void cleanupJob(JobContext jobContext) throws IOException { } 081 082 /** 083 * For committing job's output after successful job completion. Note that this 084 * is invoked for jobs with final runstate as SUCCESSFUL. 085 * 086 * @param jobContext Context of the job whose output is being written. 087 * @throws IOException 088 */ 089 public void commitJob(JobContext jobContext) throws IOException { 090 cleanupJob(jobContext); 091 } 092 093 094 /** 095 * For aborting an unsuccessful job's output. Note that this is invoked for 096 * jobs with final runstate as {@link JobStatus.State#FAILED} or 097 * {@link JobStatus.State#KILLED}. 098 * 099 * @param jobContext Context of the job whose output is being written. 100 * @param state final runstate of the job 101 * @throws IOException 102 */ 103 public void abortJob(JobContext jobContext, JobStatus.State state) 104 throws IOException { 105 cleanupJob(jobContext); 106 } 107 108 /** 109 * Sets up output for the task. 110 * 111 * @param taskContext Context of the task whose output is being written. 112 * @throws IOException 113 */ 114 public abstract void setupTask(TaskAttemptContext taskContext) 115 throws IOException; 116 117 /** 118 * Check whether task needs a commit 119 * 120 * @param taskContext 121 * @return true/false 122 * @throws IOException 123 */ 124 public abstract boolean needsTaskCommit(TaskAttemptContext taskContext) 125 throws IOException; 126 127 /** 128 * To promote the task's temporary output to final output location 129 * 130 * The task's output is moved to the job's output directory. 131 * 132 * @param taskContext Context of the task whose output is being written. 133 * @throws IOException if commit is not 134 */ 135 public abstract void commitTask(TaskAttemptContext taskContext) 136 throws IOException; 137 138 /** 139 * Discard the task output 140 * 141 * @param taskContext 142 * @throws IOException 143 */ 144 public abstract void abortTask(TaskAttemptContext taskContext) 145 throws IOException; 146 147 /** 148 * Is task output recovery supported for restarting jobs? 149 * 150 * If task output recovery is supported, job restart can be done more 151 * efficiently. 152 * 153 * @return <code>true</code> if task output recovery is supported, 154 * <code>false</code> otherwise 155 * @see #recoverTask(TaskAttemptContext) 156 */ 157 public boolean isRecoverySupported() { 158 return false; 159 } 160 161 /** 162 * Recover the task output. 163 * 164 * The retry-count for the job will be passed via the 165 * {@link MRJobConfig#APPLICATION_ATTEMPT_ID} key in 166 * {@link TaskAttemptContext#getConfiguration()} for the 167 * <code>OutputCommitter</code>. 168 * 169 * If an exception is thrown the task will be attempted again. 170 * 171 * @param taskContext Context of the task whose output is being recovered 172 * @throws IOException 173 */ 174 public void recoverTask(TaskAttemptContext taskContext) 175 throws IOException 176 {} 177 }