001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.command.wf;
016
017 import java.util.Date;
018
019 import org.apache.hadoop.conf.Configuration;
020 import org.apache.oozie.DagELFunctions;
021 import org.apache.oozie.ErrorCode;
022 import org.apache.oozie.WorkflowActionBean;
023 import org.apache.oozie.WorkflowJobBean;
024 import org.apache.oozie.action.ActionExecutor;
025 import org.apache.oozie.action.ActionExecutorException;
026 import org.apache.oozie.client.OozieClient;
027 import org.apache.oozie.client.WorkflowAction;
028 import org.apache.oozie.client.WorkflowJob;
029 import org.apache.oozie.client.SLAEvent.SlaAppType;
030 import org.apache.oozie.client.SLAEvent.Status;
031 import org.apache.oozie.command.CommandException;
032 import org.apache.oozie.service.ActionService;
033 import org.apache.oozie.service.Services;
034 import org.apache.oozie.service.UUIDService;
035 import org.apache.oozie.store.StoreException;
036 import org.apache.oozie.store.WorkflowStore;
037 import org.apache.oozie.util.Instrumentation;
038 import org.apache.oozie.util.XLog;
039 import org.apache.oozie.util.db.SLADbOperations;
040 import org.apache.oozie.workflow.WorkflowInstance;
041
042 public class ActionEndCommand extends ActionCommand<Void> {
043 public static final String COULD_NOT_END = "COULD_NOT_END";
044 public static final String END_DATA_MISSING = "END_DATA_MISSING";
045
046 private String id;
047 private String jobId = null;
048
049 public ActionEndCommand(String id, String type) {
050 super("action.end", type, 0);
051 this.id = id;
052 }
053
054 @Override
055 protected Void call(WorkflowStore store) throws StoreException, CommandException {
056 WorkflowJobBean workflow = store.getWorkflow(jobId, false);
057 setLogInfo(workflow);
058 WorkflowActionBean action = store.getAction(id, false);
059 setLogInfo(action);
060 if (action.isPending()
061 && (action.getStatus() == WorkflowActionBean.Status.DONE
062 || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) {
063 if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
064
065 ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
066 Configuration conf = workflow.getWorkflowInstance().getConf();
067 int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
068 long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
069 executor.setMaxRetries(maxRetries);
070 executor.setRetryInterval(retryInterval);
071
072 if (executor != null) {
073 boolean isRetry = false;
074 if (action.getStatus() == WorkflowActionBean.Status.END_RETRY
075 || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) {
076 isRetry = true;
077 }
078 ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
079 try {
080
081 XLog.getLog(getClass()).debug(
082 "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]",
083 action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(),
084 action.getSignalValue());
085 WorkflowInstance wfInstance = workflow.getWorkflowInstance();
086 DagELFunctions.setActionInfo(wfInstance, action);
087 workflow.setWorkflowInstance(wfInstance);
088 incrActionCounter(action.getType(), 1);
089
090 Instrumentation.Cron cron = new Instrumentation.Cron();
091 cron.start();
092 executor.end(context, action);
093 cron.stop();
094 addActionCron(action.getType(), cron);
095
096 if (!context.isEnded()) {
097 XLog.getLog(getClass()).warn(XLog.OPS,
098 "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType());
099 action.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action");
100 failJob(context);
101 store.updateAction(action);
102 store.updateWorkflow(workflow);
103 return null;
104 }
105 action.setRetries(0);
106 action.setEndTime(new Date());
107 store.updateAction(action);
108 store.updateWorkflow(workflow);
109 Status slaStatus = null;
110 switch (action.getStatus()) {
111 case OK:
112 slaStatus = Status.SUCCEEDED;
113 break;
114 case KILLED:
115 slaStatus = Status.KILLED;
116 break;
117 case FAILED:
118 slaStatus = Status.FAILED;
119 break;
120 case ERROR:
121 XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA");
122 slaStatus = Status.KILLED;
123 break;
124 default: // TODO: What will happen for other Action
125 // status
126 slaStatus = Status.FAILED;
127 break;
128 }
129 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, slaStatus,
130 SlaAppType.WORKFLOW_ACTION);
131 queueCallable(new NotificationCommand(workflow, action));
132 XLog.getLog(getClass()).debug(
133 "Queuing commands for action=" + id + ", status=" + action.getStatus()
134 + ", Set pending=" + action.getPending());
135 queueCallable(new SignalCommand(workflow.getId(), id));
136 }
137 catch (ActionExecutorException ex) {
138 XLog.getLog(getClass()).warn(
139 "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
140 action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage());
141 action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
142 action.setEndTime(null);
143 switch (ex.getErrorType()) {
144 case TRANSIENT:
145 if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) {
146 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
147 action.setPendingAge(new Date());
148 action.setRetries(0);
149 }
150 action.setEndTime(null);
151 break;
152 case NON_TRANSIENT:
153 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
154 action.setEndTime(null);
155 break;
156 case ERROR:
157 handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR);
158 queueCallable(new SignalCommand(workflow.getId(), id));
159 break;
160 case FAILED:
161 failJob(context);
162 break;
163 }
164 store.updateAction(action);
165 store.updateWorkflow(workflow);
166 }
167 }
168 else {
169 throw new CommandException(ErrorCode.E0802, action.getType());
170 }
171 }
172 else {
173 XLog.getLog(getClass()).warn("Job state is not {0}. Skipping ActionEnd Execution",
174 WorkflowJob.Status.RUNNING.toString());
175 }
176 }
177 else {
178 XLog.getLog(getClass()).debug("Action pending={0}, status={1}. Skipping ActionEnd Execution",
179 action.getPending(), action.getStatusStr());
180 }
181 return null;
182 }
183
184 @Override
185 protected Void execute(WorkflowStore store) throws CommandException, StoreException {
186 XLog.getLog(getClass()).debug("STARTED ActionEndCommand for action " + id);
187 try {
188 jobId = Services.get().get(UUIDService.class).getId(id);
189 if (lock(jobId)) {
190 call(store);
191 }
192 else {
193 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
194 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - failed {0}", id);
195 }
196 }
197 catch (InterruptedException e) {
198 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
199 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - interrupted exception failed {0}", id);
200 }
201 finally {
202 XLog.getLog(getClass()).debug("ENDED ActionEndCommand for action " + id);
203 }
204 return null;
205 }
206 }