001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapred;
020    
021    import java.io.IOException;
022    
023    import org.apache.hadoop.classification.InterfaceAudience;
024    import org.apache.hadoop.classification.InterfaceStability;
025    
026    /**
027     * <code>OutputCommitter</code> describes the commit of task output for a 
028     * Map-Reduce job.
029     *
030     * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of 
031     * the job to:<p>
032     * <ol>
033     *   <li>
034     *   Setup the job during initialization. For example, create the temporary 
035     *   output directory for the job during the initialization of the job.
036     *   </li>
037     *   <li>
038     *   Cleanup the job after the job completion. For example, remove the
039     *   temporary output directory after the job completion. 
040     *   </li>
041     *   <li>
042     *   Setup the task temporary output.
043     *   </li> 
044     *   <li>
045     *   Check whether a task needs a commit. This is to avoid the commit
046     *   procedure if a task does not need commit.
047     *   </li>
048     *   <li>
049     *   Commit of the task output.
050     *   </li>  
051     *   <li>
052     *   Discard the task commit.
053     *   </li>
054     * </ol>
055     * 
056     * @see FileOutputCommitter 
057     * @see JobContext
058     * @see TaskAttemptContext 
059     * @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.
060     */
061    @Deprecated
062    @InterfaceAudience.Public
063    @InterfaceStability.Stable
064    public abstract class OutputCommitter 
065                    extends org.apache.hadoop.mapreduce.OutputCommitter {
066      /**
067       * For the framework to setup the job output during initialization
068       * 
069       * @param jobContext Context of the job whose output is being written.
070       * @throws IOException if temporary output could not be created
071       */
072      public abstract void setupJob(JobContext jobContext) throws IOException;
073    
074      /**
075       * For cleaning up the job's output after job completion
076       * 
077       * @param jobContext Context of the job whose output is being written.
078       * @throws IOException
079       * @deprecated Use {@link #commitJob(JobContext)} or 
080       *                 {@link #abortJob(JobContext, int)} instead.
081       */
082      @Deprecated
083      public void cleanupJob(JobContext jobContext) throws IOException { }
084    
085      /**
086       * For committing job's output after successful job completion. Note that this
087       * is invoked for jobs with final runstate as SUCCESSFUL.     
088       * 
089       * @param jobContext Context of the job whose output is being written.
090       * @throws IOException 
091       */
092      public void commitJob(JobContext jobContext) throws IOException {
093        cleanupJob(jobContext);
094      }
095      
096      /**
097       * For aborting an unsuccessful job's output. Note that this is invoked for 
098       * jobs with final runstate as {@link JobStatus#FAILED} or 
099       * {@link JobStatus#KILLED}
100       * 
101       * @param jobContext Context of the job whose output is being written.
102       * @param status final runstate of the job
103       * @throws IOException
104       */
105      public void abortJob(JobContext jobContext, int status) 
106      throws IOException {
107        cleanupJob(jobContext);
108      }
109      
110      /**
111       * Sets up output for the task.
112       * 
113       * @param taskContext Context of the task whose output is being written.
114       * @throws IOException
115       */
116      public abstract void setupTask(TaskAttemptContext taskContext)
117      throws IOException;
118      
119      /**
120       * Check whether task needs a commit
121       * 
122       * @param taskContext
123       * @return true/false
124       * @throws IOException
125       */
126      public abstract boolean needsTaskCommit(TaskAttemptContext taskContext)
127      throws IOException;
128    
129      /**
130       * To promote the task's temporary output to final output location
131       * 
132       * The task's output is moved to the job's output directory.
133       * 
134       * @param taskContext Context of the task whose output is being written.
135       * @throws IOException if commit is not 
136       */
137      public abstract void commitTask(TaskAttemptContext taskContext)
138      throws IOException;
139      
140      /**
141       * Discard the task output
142       * 
143       * @param taskContext
144       * @throws IOException
145       */
146      public abstract void abortTask(TaskAttemptContext taskContext)
147      throws IOException;
148    
149      /**
150       * This method implements the new interface by calling the old method. Note
151       * that the input types are different between the new and old apis and this
152       * is a bridge between the two.
153       */
154      @Override
155      public boolean isRecoverySupported() {
156        return false;
157      }
158    
159      /**
160       * Recover the task output. 
161       * 
162       * The retry-count for the job will be passed via the 
163       * {@link MRConstants#APPLICATION_ATTEMPT_ID} key in  
164       * {@link TaskAttemptContext#getConfiguration()} for the 
165       * <code>OutputCommitter</code>.
166       * 
167       * If an exception is thrown the task will be attempted again. 
168       * 
169       * @param taskContext Context of the task whose output is being recovered
170       * @throws IOException
171       */
172      public void recoverTask(TaskAttemptContext taskContext) 
173      throws IOException {
174      }
175      
176      /**
177       * This method implements the new interface by calling the old method. Note
178       * that the input types are different between the new and old apis and this
179       * is a bridge between the two.
180       */
181      @Override
182      public final void setupJob(org.apache.hadoop.mapreduce.JobContext jobContext
183                                 ) throws IOException {
184        setupJob((JobContext) jobContext);
185      }
186    
187      /**
188       * This method implements the new interface by calling the old method. Note
189       * that the input types are different between the new and old apis and this
190       * is a bridge between the two.
191       * @deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)}
192       *             or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)}
193       *             instead.
194       */
195      @Override
196      @Deprecated
197      public final void cleanupJob(org.apache.hadoop.mapreduce.JobContext context
198                                   ) throws IOException {
199        cleanupJob((JobContext) context);
200      }
201    
202      /**
203       * This method implements the new interface by calling the old method. Note
204       * that the input types are different between the new and old apis and this
205       * is a bridge between the two.
206       */
207      @Override
208      public final void commitJob(org.apache.hadoop.mapreduce.JobContext context
209                                 ) throws IOException {
210        commitJob((JobContext) context);
211      }
212      
213      /**
214       * This method implements the new interface by calling the old method. Note
215       * that the input types are different between the new and old apis and this
216       * is a bridge between the two.
217       */
218      @Override
219      public final void abortJob(org.apache.hadoop.mapreduce.JobContext context, 
220                                       org.apache.hadoop.mapreduce.JobStatus.State runState) 
221      throws IOException {
222        int state = JobStatus.getOldNewJobRunState(runState);
223        if (state != JobStatus.FAILED && state != JobStatus.KILLED) {
224          throw new IOException ("Invalid job run state : " + runState.name());
225        }
226        abortJob((JobContext) context, state);
227      }
228      
229      /**
230       * This method implements the new interface by calling the old method. Note
231       * that the input types are different between the new and old apis and this
232       * is a bridge between the two.
233       */
234      @Override
235      public final 
236      void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
237                     ) throws IOException {
238        setupTask((TaskAttemptContext) taskContext);
239      }
240      
241      /**
242       * This method implements the new interface by calling the old method. Note
243       * that the input types are different between the new and old apis and this
244       * is a bridge between the two.
245       */
246      @Override
247      public final boolean 
248        needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
249                        ) throws IOException {
250        return needsTaskCommit((TaskAttemptContext) taskContext);
251      }
252    
253      /**
254       * This method implements the new interface by calling the old method. Note
255       * that the input types are different between the new and old apis and this
256       * is a bridge between the two.
257       */
258      @Override
259      public final 
260      void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
261                      ) throws IOException {
262        commitTask((TaskAttemptContext) taskContext);
263      }
264      
265      /**
266       * This method implements the new interface by calling the old method. Note
267       * that the input types are different between the new and old apis and this
268       * is a bridge between the two.
269       */
270      @Override
271      public final 
272      void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
273                     ) throws IOException {
274        abortTask((TaskAttemptContext) taskContext);
275      }
276      
277      /**
278       * This method implements the new interface by calling the old method. Note
279       * that the input types are different between the new and old apis and this
280       * is a bridge between the two.
281       */
282      @Override
283      public final 
284      void recoverTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
285          ) throws IOException {
286        recoverTask((TaskAttemptContext) taskContext);
287      }
288    
289    }