001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.command.wf;
016
017 import org.apache.hadoop.conf.Configuration;
018 import org.apache.oozie.client.CoordinatorAction;
019 import org.apache.oozie.client.WorkflowJob;
020 import org.apache.oozie.client.SLAEvent.SlaAppType;
021 import org.apache.oozie.client.SLAEvent.Status;
022 import org.apache.oozie.CoordinatorActionBean;
023 import org.apache.oozie.WorkflowActionBean;
024 import org.apache.oozie.WorkflowJobBean;
025 import org.apache.oozie.ErrorCode;
026 import org.apache.oozie.XException;
027 import org.apache.oozie.command.CommandException;
028 import org.apache.oozie.command.coord.CoordActionReadyCommand;
029 import org.apache.oozie.command.coord.CoordActionUpdateCommand;
030 import org.apache.oozie.coord.CoordELFunctions;
031 import org.apache.oozie.coord.CoordinatorJobException;
032 import org.apache.oozie.service.ELService;
033 import org.apache.oozie.service.SchemaService;
034 import org.apache.oozie.service.Services;
035 import org.apache.oozie.service.StoreService;
036 import org.apache.oozie.service.UUIDService;
037 import org.apache.oozie.service.WorkflowStoreService;
038 import org.apache.oozie.store.CoordinatorStore;
039 import org.apache.oozie.store.StoreException;
040 import org.apache.oozie.store.WorkflowStore;
041 import org.apache.oozie.workflow.WorkflowException;
042 import org.apache.oozie.workflow.WorkflowInstance;
043 import org.apache.oozie.util.ELEvaluator;
044 import org.apache.oozie.util.XConfiguration;
045 import org.apache.oozie.util.XLog;
046 import org.apache.oozie.util.ParamChecker;
047 import org.apache.oozie.util.XmlUtils;
048 import org.apache.oozie.util.db.SLADbOperations;
049 import org.apache.openjpa.lib.log.Log;
050 import org.jdom.Element;
051 import org.jdom.JDOMException;
052 import org.jdom.Namespace;
053
054 import java.io.StringReader;
055 import java.util.Date;
056 import java.util.List;
057 import java.util.Map;
058
059 public class SignalCommand extends WorkflowCommand<Void> {
060
061 protected static final String INSTR_SUCCEEDED_JOBS_COUNTER_NAME = "succeeded";
062
063 private String jobId;
064 private String actionId;
065
066 protected SignalCommand(String name, int priority, String jobId) {
067 super(name, name, priority, XLog.STD);
068 this.jobId = ParamChecker.notEmpty(jobId, "jobId");
069 }
070
071 public SignalCommand(String jobId, String actionId) {
072 super("signal", "signal", 1, XLog.STD);
073 this.jobId = ParamChecker.notEmpty(jobId, "jobId");
074 this.actionId = ParamChecker.notEmpty(actionId, "actionId");
075 }
076
077 @Override
078 protected Void call(WorkflowStore store) throws CommandException, StoreException {
079
080 WorkflowJobBean workflow = store.getWorkflow(jobId, false);
081 setLogInfo(workflow);
082 WorkflowActionBean action = null;
083 boolean skipAction = false;
084 if (actionId != null) {
085 action = store.getAction(actionId, false);
086 setLogInfo(action);
087 }
088 if ((action == null) || (action.isComplete() && action.isPending())) {
089 try {
090 if (workflow.getStatus() == WorkflowJob.Status.RUNNING
091 || workflow.getStatus() == WorkflowJob.Status.PREP) {
092 WorkflowInstance workflowInstance = workflow.getWorkflowInstance();
093 workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, workflow);
094 boolean completed;
095 if (action == null) {
096 if (workflow.getStatus() == WorkflowJob.Status.PREP) {
097 completed = workflowInstance.start();
098 workflow.setStatus(WorkflowJob.Status.RUNNING);
099 workflow.setStartTime(new Date());
100 workflow.setWorkflowInstance(workflowInstance);
101 // 1. Add SLA status event for WF-JOB with status
102 // STARTED
103 // 2. Add SLA registration events for all WF_ACTIONS
104 SLADbOperations.writeStausEvent(workflow.getSlaXml(), jobId, store, Status.STARTED,
105 SlaAppType.WORKFLOW_JOB);
106 writeSLARegistrationForAllActions(workflowInstance.getApp().getDefinition(), workflow
107 .getUser(), workflow.getGroup(), workflow.getConf(), store);
108 queueCallable(new NotificationCommand(workflow));
109 }
110 else {
111 throw new CommandException(ErrorCode.E0801, workflow.getId());
112 }
113 }
114 else {
115 String skipVar = workflowInstance.getVar(action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR
116 + ReRunCommand.TO_SKIP);
117 if (skipVar != null) {
118 skipAction = skipVar.equals("true");
119 }
120 completed = workflowInstance.signal(action.getExecutionPath(), action.getSignalValue());
121 workflow.setWorkflowInstance(workflowInstance);
122 action.resetPending();
123 if (!skipAction) {
124 action.setTransition(workflowInstance.getTransition(action.getName()));
125 }
126 store.updateAction(action);
127 }
128
129 if (completed) {
130 for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) {
131 WorkflowActionBean actionToKill = store.getAction(actionToKillId, false);
132 actionToKill.setPending();
133 actionToKill.setStatus(WorkflowActionBean.Status.KILLED);
134 store.updateAction(actionToKill);
135 queueCallable(new ActionKillCommand(actionToKill.getId(), actionToKill.getType()));
136 }
137
138 for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) {
139 WorkflowActionBean actionToFail = store.getAction(actionToFailId, false);
140 actionToFail.resetPending();
141 actionToFail.setStatus(WorkflowActionBean.Status.FAILED);
142 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.FAILED,
143 SlaAppType.WORKFLOW_ACTION);
144 store.updateAction(actionToFail);
145 }
146
147 workflow.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString()));
148 workflow.setEndTime(new Date());
149 workflow.setWorkflowInstance(workflowInstance);
150 Status slaStatus = Status.SUCCEEDED;
151 switch (workflow.getStatus()) {
152 case SUCCEEDED:
153 slaStatus = Status.SUCCEEDED;
154 break;
155 case KILLED:
156 slaStatus = Status.KILLED;
157 break;
158 case FAILED:
159 slaStatus = Status.FAILED;
160 break;
161 default: // TODO about SUSPENDED
162
163 }
164 SLADbOperations.writeStausEvent(workflow.getSlaXml(), jobId, store, slaStatus,
165 SlaAppType.WORKFLOW_JOB);
166 queueCallable(new NotificationCommand(workflow));
167 if (workflow.getStatus() == WorkflowJob.Status.SUCCEEDED) {
168 incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1);
169 }
170 }
171 else {
172 for (WorkflowActionBean newAction : WorkflowStoreService.getStartedActions(workflowInstance)) {
173 String skipVar = workflowInstance.getVar(newAction.getName()
174 + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP);
175 boolean skipNewAction = false;
176 if (skipVar != null) {
177 skipNewAction = skipVar.equals("true");
178 }
179 if (skipNewAction) {
180 WorkflowActionBean oldAction = store.getAction(newAction.getId(), false);
181 oldAction.setPending();
182 store.updateAction(oldAction);
183 queueCallable(new SignalCommand(jobId, oldAction.getId()));
184 }
185 else {
186 newAction.setPending();
187 String actionSlaXml = getActionSLAXml(newAction.getName(), workflowInstance.getApp()
188 .getDefinition(), workflow.getConf());
189 // System.out.println("111111 actionXml " +
190 // actionSlaXml);
191 // newAction.setSlaXml(workflow.getSlaXml());
192 newAction.setSlaXml(actionSlaXml);
193 store.insertAction(newAction);
194 queueCallable(new ActionStartCommand(newAction.getId(), newAction.getType()));
195 }
196 }
197 }
198
199 store.updateWorkflow(workflow);
200 XLog.getLog(getClass()).debug(
201 "Updated the workflow status to " + workflow.getId() + " status ="
202 + workflow.getStatusStr());
203 if (workflow.getStatus() != WorkflowJob.Status.RUNNING
204 && workflow.getStatus() != WorkflowJob.Status.SUSPENDED) {
205 queueCallable(new CoordActionUpdateCommand(workflow));
206 }
207 }
208 else {
209 XLog.getLog(getClass()).warn("Workflow not RUNNING, current status [{0}]", workflow.getStatus());
210 }
211 }
212 catch (WorkflowException ex) {
213 throw new CommandException(ex);
214 }
215 }
216 else {
217 XLog.getLog(getClass()).warn(
218 "SignalCommand for action id :" + actionId + " is already processed. status=" + action.getStatus()
219 + ", Pending=" + action.isPending());
220 }
221 return null;
222 }
223
224 public static ELEvaluator createELEvaluatorForGroup(Configuration conf, String group) {
225 ELEvaluator eval = Services.get().get(ELService.class).createEvaluator(group);
226 for (Map.Entry<String, String> entry : conf) {
227 eval.setVariable(entry.getKey(), entry.getValue());
228 }
229 return eval;
230 }
231
232 private String getActionSLAXml(String actionName, String wfXml, String wfConf) throws CommandException {
233 String slaXml = null;
234 // TODO need to fill-out the code
235 // Get the appropriate action:slaXml and resolve that.
236 try {
237 // Configuration conf = new XConfiguration(new
238 // StringReader(wfConf));
239 Element eWfJob = XmlUtils.parseXml(wfXml);
240 // String prefix = XmlUtils.getNamespacePrefix(eWfJob,
241 // SchemaService.SLA_NAME_SPACE_URI);
242 for (Element action : (List<Element>) eWfJob.getChildren("action", eWfJob.getNamespace())) {
243 if (action.getAttributeValue("name").equals(actionName) == false) {
244 continue;
245 }
246 Element eSla = action.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
247 if (eSla != null) {
248 // resolveSla(eSla, conf);
249 slaXml = XmlUtils.prettyPrint(eSla).toString();// Could use
250 // any
251 // non-null
252 // string
253 break;
254 }
255 }
256 }
257 catch (Exception e) {
258 throw new CommandException(ErrorCode.E1004, e.getMessage(), e);
259 }
260 return slaXml;
261 }
262
263 private String resolveSla(Element eSla, Configuration conf) throws CommandException {
264 String slaXml = null;
265 try {
266 ELEvaluator evalSla = SubmitCommand.createELEvaluatorForGroup(conf, "wf-sla-submit");
267 slaXml = SubmitCommand.resolveSla(eSla, evalSla);
268 }
269 catch (Exception e) {
270 throw new CommandException(ErrorCode.E1004, e.getMessage(), e);
271 }
272 return slaXml;
273 }
274
275 private void writeSLARegistrationForAllActions(String wfXml, String user, String group, String strConf,
276 WorkflowStore store) throws CommandException {
277 try {
278 Element eWfJob = XmlUtils.parseXml(wfXml);
279 // String prefix = XmlUtils.getNamespacePrefix(eWfJob,
280 // SchemaService.SLA_NAME_SPACE_URI);
281 Configuration conf = new XConfiguration(new StringReader(strConf));
282 for (Element action : (List<Element>) eWfJob.getChildren("action", eWfJob.getNamespace())) {
283 Element eSla = action.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
284 if (eSla != null) {
285 String slaXml = resolveSla(eSla, conf);
286 eSla = XmlUtils.parseXml(slaXml);
287 String actionId = Services.get().get(UUIDService.class).generateChildId(jobId,
288 action.getAttributeValue("name") + "");
289 SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionId, SlaAppType.WORKFLOW_ACTION, user,
290 group);
291 }
292 }
293 }
294 catch (Exception e) {
295 throw new CommandException(ErrorCode.E1007, "workflow:Actions " + jobId, e);
296 }
297
298 }
299
300 @Override
301 protected Void execute(WorkflowStore store) throws CommandException, StoreException {
302 XLog.getLog(getClass()).debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId);
303 try {
304 if (lock(jobId)) {
305 call(store);
306 }
307 else {
308 queueCallable(new SignalCommand(jobId, actionId), LOCK_FAILURE_REQUEUE_INTERVAL);
309 XLog.getLog(getClass()).warn("SignalCommand lock was not acquired - failed {0}", jobId);
310 }
311 }
312 catch (InterruptedException e) {
313 queueCallable(new SignalCommand(jobId, actionId), LOCK_FAILURE_REQUEUE_INTERVAL);
314 XLog.getLog(getClass()).warn("SignalCommand lock not acquired - interrupted exception failed {0}", jobId);
315 }
316 XLog.getLog(getClass()).debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId);
317 return null;
318 }
319 }