001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapred; 020 021 import java.io.IOException; 022 023 import org.apache.hadoop.classification.InterfaceAudience; 024 import org.apache.hadoop.classification.InterfaceStability; 025 026 /** 027 * <code>OutputCommitter</code> describes the commit of task output for a 028 * Map-Reduce job. 029 * 030 * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of 031 * the job to:<p> 032 * <ol> 033 * <li> 034 * Setup the job during initialization. For example, create the temporary 035 * output directory for the job during the initialization of the job. 036 * </li> 037 * <li> 038 * Cleanup the job after the job completion. For example, remove the 039 * temporary output directory after the job completion. 040 * </li> 041 * <li> 042 * Setup the task temporary output. 043 * </li> 044 * <li> 045 * Check whether a task needs a commit. This is to avoid the commit 046 * procedure if a task does not need commit. 047 * </li> 048 * <li> 049 * Commit of the task output. 050 * </li> 051 * <li> 052 * Discard the task commit. 053 * </li> 054 * </ol> 055 * 056 * @see FileOutputCommitter 057 * @see JobContext 058 * @see TaskAttemptContext 059 * @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead. 060 */ 061 @Deprecated 062 @InterfaceAudience.Public 063 @InterfaceStability.Stable 064 public abstract class OutputCommitter 065 extends org.apache.hadoop.mapreduce.OutputCommitter { 066 /** 067 * For the framework to setup the job output during initialization 068 * 069 * @param jobContext Context of the job whose output is being written. 070 * @throws IOException if temporary output could not be created 071 */ 072 public abstract void setupJob(JobContext jobContext) throws IOException; 073 074 /** 075 * For cleaning up the job's output after job completion 076 * 077 * @param jobContext Context of the job whose output is being written. 078 * @throws IOException 079 * @deprecated Use {@link #commitJob(JobContext)} or 080 * {@link #abortJob(JobContext, int)} instead. 081 */ 082 @Deprecated 083 public void cleanupJob(JobContext jobContext) throws IOException { } 084 085 /** 086 * For committing job's output after successful job completion. Note that this 087 * is invoked for jobs with final runstate as SUCCESSFUL. 088 * 089 * @param jobContext Context of the job whose output is being written. 090 * @throws IOException 091 */ 092 public void commitJob(JobContext jobContext) throws IOException { 093 cleanupJob(jobContext); 094 } 095 096 /** 097 * For aborting an unsuccessful job's output. Note that this is invoked for 098 * jobs with final runstate as {@link JobStatus#FAILED} or 099 * {@link JobStatus#KILLED} 100 * 101 * @param jobContext Context of the job whose output is being written. 102 * @param status final runstate of the job 103 * @throws IOException 104 */ 105 public void abortJob(JobContext jobContext, int status) 106 throws IOException { 107 cleanupJob(jobContext); 108 } 109 110 /** 111 * Sets up output for the task. 112 * 113 * @param taskContext Context of the task whose output is being written. 114 * @throws IOException 115 */ 116 public abstract void setupTask(TaskAttemptContext taskContext) 117 throws IOException; 118 119 /** 120 * Check whether task needs a commit 121 * 122 * @param taskContext 123 * @return true/false 124 * @throws IOException 125 */ 126 public abstract boolean needsTaskCommit(TaskAttemptContext taskContext) 127 throws IOException; 128 129 /** 130 * To promote the task's temporary output to final output location 131 * 132 * The task's output is moved to the job's output directory. 133 * 134 * @param taskContext Context of the task whose output is being written. 135 * @throws IOException if commit is not 136 */ 137 public abstract void commitTask(TaskAttemptContext taskContext) 138 throws IOException; 139 140 /** 141 * Discard the task output 142 * 143 * @param taskContext 144 * @throws IOException 145 */ 146 public abstract void abortTask(TaskAttemptContext taskContext) 147 throws IOException; 148 149 /** 150 * This method implements the new interface by calling the old method. Note 151 * that the input types are different between the new and old apis and this 152 * is a bridge between the two. 153 */ 154 @Override 155 public boolean isRecoverySupported() { 156 return false; 157 } 158 159 /** 160 * Recover the task output. 161 * 162 * The retry-count for the job will be passed via the 163 * {@link MRConstants#APPLICATION_ATTEMPT_ID} key in 164 * {@link TaskAttemptContext#getConfiguration()} for the 165 * <code>OutputCommitter</code>. 166 * 167 * If an exception is thrown the task will be attempted again. 168 * 169 * @param taskContext Context of the task whose output is being recovered 170 * @throws IOException 171 */ 172 public void recoverTask(TaskAttemptContext taskContext) 173 throws IOException { 174 } 175 176 /** 177 * This method implements the new interface by calling the old method. Note 178 * that the input types are different between the new and old apis and this 179 * is a bridge between the two. 180 */ 181 @Override 182 public final void setupJob(org.apache.hadoop.mapreduce.JobContext jobContext 183 ) throws IOException { 184 setupJob((JobContext) jobContext); 185 } 186 187 /** 188 * This method implements the new interface by calling the old method. Note 189 * that the input types are different between the new and old apis and this 190 * is a bridge between the two. 191 * @deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)} 192 * or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)} 193 * instead. 194 */ 195 @Override 196 @Deprecated 197 public final void cleanupJob(org.apache.hadoop.mapreduce.JobContext context 198 ) throws IOException { 199 cleanupJob((JobContext) context); 200 } 201 202 /** 203 * This method implements the new interface by calling the old method. Note 204 * that the input types are different between the new and old apis and this 205 * is a bridge between the two. 206 */ 207 @Override 208 public final void commitJob(org.apache.hadoop.mapreduce.JobContext context 209 ) throws IOException { 210 commitJob((JobContext) context); 211 } 212 213 /** 214 * This method implements the new interface by calling the old method. Note 215 * that the input types are different between the new and old apis and this 216 * is a bridge between the two. 217 */ 218 @Override 219 public final void abortJob(org.apache.hadoop.mapreduce.JobContext context, 220 org.apache.hadoop.mapreduce.JobStatus.State runState) 221 throws IOException { 222 int state = JobStatus.getOldNewJobRunState(runState); 223 if (state != JobStatus.FAILED && state != JobStatus.KILLED) { 224 throw new IOException ("Invalid job run state : " + runState.name()); 225 } 226 abortJob((JobContext) context, state); 227 } 228 229 /** 230 * This method implements the new interface by calling the old method. Note 231 * that the input types are different between the new and old apis and this 232 * is a bridge between the two. 233 */ 234 @Override 235 public final 236 void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext 237 ) throws IOException { 238 setupTask((TaskAttemptContext) taskContext); 239 } 240 241 /** 242 * This method implements the new interface by calling the old method. Note 243 * that the input types are different between the new and old apis and this 244 * is a bridge between the two. 245 */ 246 @Override 247 public final boolean 248 needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext 249 ) throws IOException { 250 return needsTaskCommit((TaskAttemptContext) taskContext); 251 } 252 253 /** 254 * This method implements the new interface by calling the old method. Note 255 * that the input types are different between the new and old apis and this 256 * is a bridge between the two. 257 */ 258 @Override 259 public final 260 void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext 261 ) throws IOException { 262 commitTask((TaskAttemptContext) taskContext); 263 } 264 265 /** 266 * This method implements the new interface by calling the old method. Note 267 * that the input types are different between the new and old apis and this 268 * is a bridge between the two. 269 */ 270 @Override 271 public final 272 void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext 273 ) throws IOException { 274 abortTask((TaskAttemptContext) taskContext); 275 } 276 277 /** 278 * This method implements the new interface by calling the old method. Note 279 * that the input types are different between the new and old apis and this 280 * is a bridge between the two. 281 */ 282 @Override 283 public final 284 void recoverTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext 285 ) throws IOException { 286 recoverTask((TaskAttemptContext) taskContext); 287 } 288 289 }