001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.command.wf;
016
017 import java.io.IOException;
018 import java.io.StringReader;
019 import java.net.URI;
020 import java.net.URISyntaxException;
021 import java.util.Date;
022 import java.util.Properties;
023
024 import org.apache.hadoop.conf.Configuration;
025 import org.apache.hadoop.fs.FileSystem;
026 import org.apache.hadoop.fs.Path;
027 import org.apache.oozie.DagELFunctions;
028 import org.apache.oozie.WorkflowActionBean;
029 import org.apache.oozie.WorkflowJobBean;
030 import org.apache.oozie.action.ActionExecutor;
031 import org.apache.oozie.client.WorkflowAction;
032 import org.apache.oozie.client.WorkflowJob;
033 import org.apache.oozie.command.CommandException;
034 import org.apache.oozie.service.CallbackService;
035 import org.apache.oozie.service.ELService;
036 import org.apache.oozie.service.HadoopAccessorException;
037 import org.apache.oozie.service.HadoopAccessorService;
038 import org.apache.oozie.service.Services;
039 import org.apache.oozie.store.StoreException;
040 import org.apache.oozie.store.WorkflowStore;
041 import org.apache.oozie.util.ELEvaluator;
042 import org.apache.oozie.util.Instrumentation;
043 import org.apache.oozie.util.XConfiguration;
044 import org.apache.oozie.util.XLog;
045 import org.apache.oozie.workflow.WorkflowException;
046 import org.apache.oozie.workflow.WorkflowInstance;
047 import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
048
049 /**
050 * Base class for Action execution commands. Provides common functionality to handle different types of errors while
051 * attempting to start or end an action.
052 */
053 public abstract class ActionCommand<T> extends WorkflowCommand<Void> {
054 private static final String INSTRUMENTATION_GROUP = "action.executors";
055
056 protected static final String INSTR_FAILED_JOBS_COUNTER = "failed";
057
058 protected static final String RECOVERY_ID_SEPARATOR = "@";
059
060 public ActionCommand(String name, String type, int priority) {
061 super(name, type, priority, XLog.STD);
062 }
063
064 /**
065 * Takes care of Transient failures. Sets the action status to retry and increments the retry count if not enough
066 * attempts have been made. Otherwise returns false.
067 *
068 * @param context the execution context.
069 * @param executor the executor instance being used.
070 * @param status the status to be set for the action.
071 * @return true if the action is scheduled for another retry. false if the number of retries has exceeded the
072 * maximum number of configured retries.
073 * @throws StoreException
074 * @throws org.apache.oozie.command.CommandException
075 */
076 protected boolean handleTransient(ActionExecutor.Context context, ActionExecutor executor, WorkflowAction.Status status)
077 throws StoreException, CommandException {
078 XLog.getLog(getClass()).debug("Attempting to retry");
079 ActionExecutorContext aContext = (ActionExecutorContext) context;
080 WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
081 incrActionErrorCounter(action.getType(), "transient", 1);
082
083 int actionRetryCount = action.getRetries();
084 if (actionRetryCount >= executor.getMaxRetries()) {
085 XLog.getLog(getClass()).warn("Exceeded max retry count [{0}]. Suspending Job", executor.getMaxRetries());
086 return false;
087 }
088 else {
089 action.setStatus(status);
090 action.setPending();
091 action.incRetries();
092 long retryDelayMillis = executor.getRetryInterval() * 1000;
093 action.setPendingAge(new Date(System.currentTimeMillis() + retryDelayMillis));
094 XLog.getLog(getClass()).info("Next Retry, Attempt Number [{0}] in [{1}] milliseconds",
095 actionRetryCount + 1, retryDelayMillis);
096 queueCallable(this, retryDelayMillis);
097 return true;
098 }
099 }
100
101 /**
102 * Takes care of non transient failures. The job is suspended, and the state of the action is changed to *MANUAL
103 * and set pending flag of action to false
104 *
105 * @param store WorkflowStore
106 * @param context the execution context.
107 * @param executor the executor instance being used.
108 * @param status the status to be set for the action.
109 * @throws StoreException
110 * @throws CommandException
111 */
112 protected void handleNonTransient(WorkflowStore store, ActionExecutor.Context context, ActionExecutor executor,
113 WorkflowAction.Status status)
114 throws StoreException, CommandException {
115 ActionExecutorContext aContext = (ActionExecutorContext) context;
116 WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
117 incrActionErrorCounter(action.getType(), "nontransient", 1);
118 WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
119 String id = workflow.getId();
120 action.setStatus(status);
121 action.resetPendingOnly();
122 XLog.getLog(getClass()).warn("Suspending Workflow Job id=" + id);
123 try {
124 SuspendCommand.suspendJob(store, workflow, id, action.getId());
125 }
126 catch (WorkflowException e) {
127 throw new CommandException(e);
128 }
129 }
130
131 /**
132 * Takes care of errors. </p> For errors while attempting to start the action, the job state is updated and an
133 * {@link ActionEndCommand} is queued. </p> For errors while attempting to end the action, the job state is updated.
134 * </p>
135 *
136 * @param context the execution context.
137 * @param executor the executor instance being used.
138 * @param message
139 * @param isStart whether the error was generated while starting or ending an action.
140 * @param status the status to be set for the action.
141 * @throws org.apache.oozie.command.CommandException
142 */
143 protected void handleError(ActionExecutor.Context context, ActionExecutor executor, String message,
144 boolean isStart, WorkflowAction.Status status) throws CommandException {
145 XLog.getLog(getClass()).warn("Setting Action Status to [{0}]", status);
146 ActionExecutorContext aContext = (ActionExecutorContext) context;
147 WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
148 incrActionErrorCounter(action.getType(), "error", 1);
149 action.setPending();
150 if (isStart) {
151 action.setExecutionData(message, null);
152 queueCallable(new ActionEndCommand(action.getId(), action.getType()));
153 }
154 else {
155 action.setEndData(status, WorkflowAction.Status.ERROR.toString());
156 }
157 }
158
159 public void failJob(ActionExecutor.Context context) throws CommandException {
160 ActionExecutorContext aContext = (ActionExecutorContext) context;
161 WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
162 incrActionErrorCounter(action.getType(), "failed", 1);
163 WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
164 XLog.getLog(getClass()).warn("Failing Job due to failed action [{0}]", action.getName());
165 try {
166 workflow.getWorkflowInstance().fail(action.getName());
167 WorkflowInstance wfInstance = workflow.getWorkflowInstance();
168 ((LiteWorkflowInstance) wfInstance).setStatus(WorkflowInstance.Status.FAILED);
169 workflow.setWorkflowInstance(wfInstance);
170 workflow.setStatus(WorkflowJob.Status.FAILED);
171 action.setStatus(WorkflowAction.Status.FAILED);
172 action.resetPending();
173 queueCallable(new NotificationCommand(workflow, action));
174 queueCallable(new KillCommand(workflow.getId()));
175 incrJobCounter(INSTR_FAILED_JOBS_COUNTER, 1);
176 }
177 catch (WorkflowException ex) {
178 throw new CommandException(ex);
179 }
180 }
181
182 private void incrActionErrorCounter(String type, String error, int count) {
183 getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#ex." + error, count);
184 }
185
186 protected void incrActionCounter(String type, int count) {
187 getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#" + getName(), count);
188 }
189
190 protected void addActionCron(String type, Instrumentation.Cron cron) {
191 getInstrumentation().addCron(INSTRUMENTATION_GROUP, type + "#" + getName(), cron);
192 }
193
194 public static class ActionExecutorContext implements ActionExecutor.Context {
195 private WorkflowJobBean workflow;
196 private Configuration protoConf;
197 private WorkflowActionBean action;
198 private boolean isRetry;
199 private boolean started;
200 private boolean ended;
201 private boolean executed;
202
203 public ActionExecutorContext(WorkflowJobBean workflow, WorkflowActionBean action, boolean isRetry) {
204 this.workflow = workflow;
205 this.action = action;
206 this.isRetry = isRetry;
207 try {
208 protoConf = new XConfiguration(new StringReader(workflow.getProtoActionConf()));
209 }
210 catch (IOException ex) {
211 throw new RuntimeException("It should not happen", ex);
212 }
213 }
214
215 public String getCallbackUrl(String externalStatusVar) {
216 return Services.get().get(CallbackService.class).createCallBackUrl(action.getId(), externalStatusVar);
217 }
218
219 public Configuration getProtoActionConf() {
220 return protoConf;
221 }
222
223 public WorkflowJob getWorkflow() {
224 return workflow;
225 }
226
227 public WorkflowAction getAction() {
228 return action;
229 }
230
231 public ELEvaluator getELEvaluator() {
232 ELEvaluator evaluator = Services.get().get(ELService.class).createEvaluator("workflow");
233 DagELFunctions.configureEvaluator(evaluator, workflow, action);
234 return evaluator;
235 }
236
237 public void setVar(String name, String value) {
238 name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
239 WorkflowInstance wfInstance = workflow.getWorkflowInstance();
240 wfInstance.setVar(name, value);
241 //workflow.getWorkflowInstance().setVar(name, value);
242 workflow.setWorkflowInstance(wfInstance);
243 }
244
245 public String getVar(String name) {
246 name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
247 return workflow.getWorkflowInstance().getVar(name);
248 }
249
250 public void setStartData(String externalId, String trackerUri, String consoleUrl) {
251 action.setStartData(externalId, trackerUri, consoleUrl);
252 started = true;
253 }
254
255 public void setExecutionData(String externalStatus, Properties actionData) {
256 action.setExecutionData(externalStatus, actionData);
257 executed = true;
258 }
259
260 public void setEndData(WorkflowAction.Status status, String signalValue) {
261 action.setEndData(status, signalValue);
262 ended = true;
263 }
264
265 public boolean isRetry() {
266 return isRetry;
267 }
268
269 /**
270 * Returns whether setStartData has been called or not.
271 *
272 * @return true if start completion info has been set.
273 */
274 public boolean isStarted() {
275 return started;
276 }
277
278 /**
279 * Returns whether setExecutionData has been called or not.
280 *
281 * @return true if execution completion info has been set, otherwise false.
282 */
283 public boolean isExecuted() {
284 return executed;
285 }
286
287
288 /**
289 * Returns whether setEndData has been called or not.
290 *
291 * @return true if end completion info has been set.
292 */
293 public boolean isEnded() {
294 return ended;
295 }
296
297 public void setExternalStatus(String externalStatus) {
298 action.setExternalStatus(externalStatus);
299 }
300
301 @Override
302 public String getRecoveryId() {
303 return action.getId() + RECOVERY_ID_SEPARATOR + workflow.getRun();
304 }
305
306 /* (non-Javadoc)
307 * @see org.apache.oozie.action.ActionExecutor.Context#getActionDir()
308 */
309 public Path getActionDir() throws HadoopAccessorException, IOException, URISyntaxException {
310 String name = getWorkflow().getId() + "/" + action.getName() + "--" + action.getType();
311 FileSystem fs = getAppFileSystem();
312 String actionDirPath = Services.get().getSystemId() + "/" + name;
313 Path fqActionDir = new Path(fs.getHomeDirectory(), actionDirPath);
314 return fqActionDir;
315 }
316
317 /* (non-Javadoc)
318 * @see org.apache.oozie.action.ActionExecutor.Context#getAppFileSystem()
319 */
320 public FileSystem getAppFileSystem() throws HadoopAccessorException, IOException, URISyntaxException {
321 WorkflowJob workflow = getWorkflow();
322 XConfiguration jobConf = new XConfiguration(new StringReader(workflow.getConf()));
323 Configuration fsConf = new Configuration();
324 XConfiguration.copy(jobConf, fsConf);
325 return Services.get().get(HadoopAccessorService.class).createFileSystem(workflow.getUser(),
326 workflow.getGroup(), new URI(getWorkflow().getAppPath()), fsConf);
327
328 }
329
330 @Override
331 public void setErrorInfo(String str, String exMsg) {
332 action.setErrorInfo(str, exMsg);
333 }
334 }
335
336 }