001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.command.wf;
016
017 import java.util.Date;
018
019 import javax.servlet.jsp.el.ELException;
020
021 import org.apache.hadoop.conf.Configuration;
022 import org.apache.oozie.ErrorCode;
023 import org.apache.oozie.FaultInjection;
024 import org.apache.oozie.WorkflowActionBean;
025 import org.apache.oozie.WorkflowJobBean;
026 import org.apache.oozie.XException;
027 import org.apache.oozie.action.ActionExecutor;
028 import org.apache.oozie.action.ActionExecutorException;
029 import org.apache.oozie.client.OozieClient;
030 import org.apache.oozie.client.WorkflowAction;
031 import org.apache.oozie.client.WorkflowJob;
032 import org.apache.oozie.client.SLAEvent.SlaAppType;
033 import org.apache.oozie.client.SLAEvent.Status;
034 import org.apache.oozie.command.CommandException;
035 import org.apache.oozie.command.coord.CoordActionUpdateCommand;
036 import org.apache.oozie.service.ActionService;
037 import org.apache.oozie.service.Services;
038 import org.apache.oozie.service.UUIDService;
039 import org.apache.oozie.store.StoreException;
040 import org.apache.oozie.store.WorkflowStore;
041 import org.apache.oozie.util.ELEvaluationException;
042 import org.apache.oozie.util.Instrumentation;
043 import org.apache.oozie.util.XLog;
044 import org.apache.oozie.util.XmlUtils;
045 import org.apache.oozie.util.db.SLADbOperations;
046
047 public class ActionStartCommand extends ActionCommand<Void> {
048 public static final String EL_ERROR = "EL_ERROR";
049 public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR";
050 public static final String COULD_NOT_START = "COULD_NOT_START";
051 public static final String START_DATA_MISSING = "START_DATA_MISSING";
052 public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING";
053
054 private String id;
055 private String jobId;
056
057 public ActionStartCommand(String id, String type) {
058 super("action.start", type, 0);
059 this.id = id;
060 }
061
062 @Override
063 protected Void call(WorkflowStore store) throws StoreException, CommandException {
064 WorkflowJobBean workflow = store.getWorkflow(jobId, false);
065 setLogInfo(workflow);
066 WorkflowActionBean action = store.getAction(id, false);
067 XLog.getLog(getClass()).warn(XLog.STD,
068 "[***" + action.getId() + "***]" + "In call()....status=" + action.getStatusStr());
069 setLogInfo(action);
070 if (action.isPending()
071 && (action.getStatus() == WorkflowActionBean.Status.PREP
072 || action.getStatus() == WorkflowActionBean.Status.START_RETRY || action.getStatus() == WorkflowActionBean.Status.START_MANUAL)) {
073 if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
074
075 ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
076 Configuration conf = workflow.getWorkflowInstance().getConf();
077
078 int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
079 long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
080 executor.setMaxRetries(maxRetries);
081 executor.setRetryInterval(retryInterval);
082
083 if (executor != null) {
084 ActionExecutorContext context = null;
085 try {
086 boolean isRetry = false;
087 if (action.getStatus() == WorkflowActionBean.Status.START_RETRY
088 || action.getStatus() == WorkflowActionBean.Status.START_MANUAL) {
089 isRetry = true;
090 }
091 context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
092 try {
093 String tmpActionConf = XmlUtils.removeComments(action.getConf());
094 String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class);
095 action.setConf(actionConf);
096
097 XLog.getLog(getClass()).debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}",
098 action.getName(), action.getType(), actionConf);
099
100 }
101 catch (ELEvaluationException ex) {
102 throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT,
103 EL_EVAL_ERROR, ex.getMessage(), ex);
104 }
105 catch (ELException ex) {
106 context.setErrorInfo(EL_ERROR, ex.getMessage());
107 XLog.getLog(getClass()).warn("ELException in ActionStartCommand ", ex.getMessage(), ex);
108 handleError(context, store, workflow, action);
109 return null;
110 }
111 catch (org.jdom.JDOMException je) {
112 context.setErrorInfo("ParsingError", je.getMessage());
113 XLog.getLog(getClass()).warn("JDOMException in ActionStartCommand ", je.getMessage(), je);
114 handleError(context, store, workflow, action);
115 return null;
116 }
117 catch (Exception ex) {
118 context.setErrorInfo(EL_ERROR, ex.getMessage());
119 XLog.getLog(getClass()).warn("Exception in ActionStartCommand ", ex.getMessage(), ex);
120 handleError(context, store, workflow, action);
121 return null;
122 }
123 action.setErrorInfo(null, null);
124 incrActionCounter(action.getType(), 1);
125
126 Instrumentation.Cron cron = new Instrumentation.Cron();
127 cron.start();
128 executor.start(context, action);
129 cron.stop();
130 FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection");
131 addActionCron(action.getType(), cron);
132
133 action.setRetries(0);
134 if (action.isExecutionComplete()) {
135 if (!context.isExecuted()) {
136 XLog.getLog(getClass()).warn(XLog.OPS,
137 "Action Completed, ActionExecutor [{0}] must call setExecutionData()",
138 executor.getType());
139 action.setErrorInfo(EXEC_DATA_MISSING,
140 "Execution Complete, but Execution Data Missing from Action");
141 failJob(context);
142 store.updateAction(action);
143 store.updateWorkflow(workflow);
144 return null;
145 }
146 action.setPending();
147 queueCallable(new ActionEndCommand(action.getId(), action.getType()));
148 }
149 else {
150 if (!context.isStarted()) {
151 XLog.getLog(getClass()).warn(XLog.OPS,
152 "Action Started, ActionExecutor [{0}] must call setStartData()",
153 executor.getType());
154 action.setErrorInfo(START_DATA_MISSING,
155 "Execution Started, but Start Data Missing from Action");
156 failJob(context);
157 store.updateAction(action);
158 store.updateWorkflow(workflow);
159 return null;
160 }
161 queueCallable(new NotificationCommand(workflow, action));
162 }
163
164 XLog.getLog(getClass()).warn(XLog.STD,
165 "[***" + action.getId() + "***]" + "Action status=" + action.getStatusStr());
166
167 store.updateAction(action);
168 store.updateWorkflow(workflow);
169 // Add SLA status event (STARTED) for WF_ACTION
170 // SLADbOperations.writeSlaStatusEvent(eSla,
171 // action.getId(), Status.STARTED, store);
172 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.STARTED,
173 SlaAppType.WORKFLOW_ACTION);
174 XLog.getLog(getClass()).warn(XLog.STD,
175 "[***" + action.getId() + "***]" + "Action updated in DB!");
176
177 }
178 catch (ActionExecutorException ex) {
179 XLog.getLog(getClass()).warn(
180 "Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
181 action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex);
182 action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
183 switch (ex.getErrorType()) {
184 case TRANSIENT:
185 if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) {
186 handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
187 action.setPendingAge(new Date());
188 action.setRetries(0);
189 action.setStartTime(null);
190 }
191 break;
192 case NON_TRANSIENT:
193 handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
194 break;
195 case ERROR:
196 handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true,
197 WorkflowAction.Status.DONE);
198 break;
199 case FAILED:
200 try {
201 failJob(context);
202 queueCallable(new CoordActionUpdateCommand(workflow));
203 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store,
204 Status.FAILED, SlaAppType.WORKFLOW_ACTION);
205 SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store,
206 Status.FAILED, SlaAppType.WORKFLOW_JOB);
207 }
208 catch (XException x) {
209 XLog.getLog(getClass()).warn("ActionStartCommand - case:FAILED ", x.getMessage());
210 }
211 break;
212 }
213 store.updateAction(action);
214 store.updateWorkflow(workflow);
215 }
216 }
217 else {
218 throw new CommandException(ErrorCode.E0802, action.getType());
219 }
220
221 }
222 else {
223 XLog.getLog(getClass()).warn("Job state is not {0}. Skipping Action Execution",
224 WorkflowJob.Status.RUNNING.toString());
225 }
226 }
227 return null;
228 }
229
230 private void handleError(ActionExecutorContext context, WorkflowStore store, WorkflowJobBean workflow,
231 WorkflowActionBean action) throws CommandException, StoreException {
232 failJob(context);
233 store.updateAction(action);
234 store.updateWorkflow(workflow);
235 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.FAILED,
236 SlaAppType.WORKFLOW_ACTION);
237 SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store, Status.FAILED,
238 SlaAppType.WORKFLOW_JOB);
239 queueCallable(new CoordActionUpdateCommand(workflow));
240 return;
241 }
242
243 @Override
244 protected Void execute(WorkflowStore store) throws CommandException, StoreException {
245 try {
246 XLog.getLog(getClass()).debug("STARTED ActionStartCommand for wf actionId=" + id);
247 jobId = Services.get().get(UUIDService.class).getId(id);
248 if (lock(jobId)) {
249 call(store);
250 }
251 else {
252 queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
253 XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - failed {0}", id);
254 }
255 }
256 catch (InterruptedException e) {
257 queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
258 XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - interrupted exception failed {0}",
259 id);
260 }
261 XLog.getLog(getClass()).debug("ENDED ActionStartCommand for wf actionId=" + id + ", jobId=" + jobId);
262 return null;
263 }
264
265 }