001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapreduce;
020    
021    import java.io.IOException;
022    
023    import org.apache.hadoop.classification.InterfaceAudience;
024    import org.apache.hadoop.classification.InterfaceStability;
025    /**
026     * <code>OutputCommitter</code> describes the commit of task output for a 
027     * Map-Reduce job.
028     *
029     * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of 
030     * the job to:<p>
031     * <ol>
032     *   <li>
033     *   Setup the job during initialization. For example, create the temporary 
034     *   output directory for the job during the initialization of the job.
035     *   </li>
036     *   <li>
037     *   Cleanup the job after the job completion. For example, remove the
038     *   temporary output directory after the job completion. 
039     *   </li>
040     *   <li>
041     *   Setup the task temporary output.
042     *   </li> 
043     *   <li>
044     *   Check whether a task needs a commit. This is to avoid the commit
045     *   procedure if a task does not need commit.
046     *   </li>
047     *   <li>
048     *   Commit of the task output.
049     *   </li>  
050     *   <li>
051     *   Discard the task commit.
052     *   </li>
053     * </ol>
054     * 
055     * @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 
056     * @see JobContext
057     * @see TaskAttemptContext 
058     *
059     */
060    @InterfaceAudience.Public
061    @InterfaceStability.Stable
062    public abstract class OutputCommitter {
063      /**
064       * For the framework to setup the job output during initialization
065       * 
066       * @param jobContext Context of the job whose output is being written.
067       * @throws IOException if temporary output could not be created
068       */
069      public abstract void setupJob(JobContext jobContext) throws IOException;
070    
071      /**
072       * For cleaning up the job's output after job completion
073       * 
074       * @param jobContext Context of the job whose output is being written.
075       * @throws IOException
076       * @deprecated Use {@link #commitJob(JobContext)} or
077       *                 {@link #abortJob(JobContext, JobStatus.State)} instead.
078       */
079      @Deprecated
080      public void cleanupJob(JobContext jobContext) throws IOException { }
081    
082      /**
083       * For committing job's output after successful job completion. Note that this
084       * is invoked for jobs with final runstate as SUCCESSFUL.     
085       * 
086       * @param jobContext Context of the job whose output is being written.
087       * @throws IOException
088       */
089      public void commitJob(JobContext jobContext) throws IOException {
090        cleanupJob(jobContext);
091      }
092    
093      
094      /**
095       * For aborting an unsuccessful job's output. Note that this is invoked for 
096       * jobs with final runstate as {@link JobStatus.State#FAILED} or 
097       * {@link JobStatus.State#KILLED}.
098       *
099       * @param jobContext Context of the job whose output is being written.
100       * @param state final runstate of the job
101       * @throws IOException
102       */
103      public void abortJob(JobContext jobContext, JobStatus.State state) 
104      throws IOException {
105        cleanupJob(jobContext);
106      }
107      
108      /**
109       * Sets up output for the task.
110       * 
111       * @param taskContext Context of the task whose output is being written.
112       * @throws IOException
113       */
114      public abstract void setupTask(TaskAttemptContext taskContext)
115      throws IOException;
116      
117      /**
118       * Check whether task needs a commit
119       * 
120       * @param taskContext
121       * @return true/false
122       * @throws IOException
123       */
124      public abstract boolean needsTaskCommit(TaskAttemptContext taskContext)
125      throws IOException;
126    
127      /**
128       * To promote the task's temporary output to final output location
129       * 
130       * The task's output is moved to the job's output directory.
131       * 
132       * @param taskContext Context of the task whose output is being written.
133       * @throws IOException if commit is not 
134       */
135      public abstract void commitTask(TaskAttemptContext taskContext)
136      throws IOException;
137      
138      /**
139       * Discard the task output
140       * 
141       * @param taskContext
142       * @throws IOException
143       */
144      public abstract void abortTask(TaskAttemptContext taskContext)
145      throws IOException;
146    
147      /**
148       * Is task output recovery supported for restarting jobs?
149       * 
150       * If task output recovery is supported, job restart can be done more 
151       * efficiently.
152       * 
153       * @return <code>true</code> if task output recovery is supported,
154       *         <code>false</code> otherwise
155       * @see #recoverTask(TaskAttemptContext)         
156       */
157      public boolean isRecoverySupported() {
158        return false;
159      }
160      
161      /**
162       * Recover the task output. 
163       * 
164       * The retry-count for the job will be passed via the 
165       * {@link MRJobConfig#APPLICATION_ATTEMPT_ID} key in  
166       * {@link TaskAttemptContext#getConfiguration()} for the 
167       * <code>OutputCommitter</code>.
168       * 
169       * If an exception is thrown the task will be attempted again. 
170       * 
171       * @param taskContext Context of the task whose output is being recovered
172       * @throws IOException
173       */
174      public void recoverTask(TaskAttemptContext taskContext)
175      throws IOException
176      {}
177    }