001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.workflow.lite;
016
017 import org.apache.oozie.service.XLogService;
018 import org.apache.oozie.service.DagXLogInfoService;
019 import org.apache.oozie.client.OozieClient;
020 import org.apache.hadoop.io.Writable;
021 import org.apache.hadoop.util.ReflectionUtils;
022 import org.apache.hadoop.conf.Configuration;
023 import org.apache.oozie.workflow.WorkflowApp;
024 import org.apache.oozie.workflow.WorkflowException;
025 import org.apache.oozie.workflow.WorkflowInstance;
026 import org.apache.oozie.util.ParamChecker;
027 import org.apache.oozie.util.XLog;
028 import org.apache.oozie.util.XConfiguration;
029 import org.apache.oozie.ErrorCode;
030
031 import java.io.DataInput;
032 import java.io.DataOutput;
033 import java.io.IOException;
034 import java.io.ByteArrayOutputStream;
035 import java.io.ByteArrayInputStream;
036 import java.util.ArrayList;
037 import java.util.HashMap;
038 import java.util.List;
039 import java.util.Map;
040
041 //TODO javadoc
042 public class LiteWorkflowInstance implements Writable, WorkflowInstance {
043 private static final String TRANSITION_TO = "transition.to";
044
045 private XLog log;
046
047 private static String PATH_SEPARATOR = "/";
048 private static String ROOT = PATH_SEPARATOR;
049 private static String TRANSITION_SEPARATOR = "#";
050
051 private static class NodeInstance {
052 String nodeName;
053 boolean started = false;
054
055 private NodeInstance(String nodeName) {
056 this.nodeName = nodeName;
057 }
058 }
059
060 private class Context implements NodeHandler.Context {
061 private NodeDef nodeDef;
062 private String executionPath;
063 private String exitState;
064 private Status status = Status.RUNNING;
065
066 private Context(NodeDef nodeDef, String executionPath, String exitState) {
067 this.nodeDef = nodeDef;
068 this.executionPath = executionPath;
069 this.exitState = exitState;
070 }
071
072 public NodeDef getNodeDef() {
073 return nodeDef;
074 }
075
076 public String getExecutionPath() {
077 return executionPath;
078 }
079
080 public String getParentExecutionPath(String executionPath) {
081 return LiteWorkflowInstance.getParentPath(executionPath);
082 }
083
084 public String getSignalValue() {
085 return exitState;
086 }
087
088 public String createExecutionPath(String name) {
089 return LiteWorkflowInstance.createChildPath(executionPath, name);
090 }
091
092 public String createFullTransition(String executionPath, String transition) {
093 return LiteWorkflowInstance.createFullTransition(executionPath, transition);
094 }
095
096 public void deleteExecutionPath() {
097 if (!executionPaths.containsKey(executionPath)) {
098 throw new IllegalStateException();
099 }
100 executionPaths.remove(executionPath);
101 executionPath = LiteWorkflowInstance.getParentPath(executionPath);
102 }
103
104 public void failJob() {
105 status = Status.FAILED;
106 }
107
108 public void killJob() {
109 status = Status.KILLED;
110 }
111
112 public void completeJob() {
113 status = Status.SUCCEEDED;
114 }
115
116 @Override
117 public Object getTransientVar(String name) {
118 return LiteWorkflowInstance.this.getTransientVar(name);
119 }
120
121 @Override
122 public String getVar(String name) {
123 return LiteWorkflowInstance.this.getVar(name);
124 }
125
126 @Override
127 public void setTransientVar(String name, Object value) {
128 LiteWorkflowInstance.this.setTransientVar(name, value);
129 }
130
131 @Override
132 public void setVar(String name, String value) {
133 LiteWorkflowInstance.this.setVar(name, value);
134 }
135
136 @Override
137 public LiteWorkflowInstance getProcessInstance() {
138 return LiteWorkflowInstance.this;
139 }
140
141 }
142
143 private LiteWorkflowApp def;
144 private Configuration conf;
145 private String instanceId;
146 private Status status;
147 private Map<String, NodeInstance> executionPaths = new HashMap<String, NodeInstance>();
148 private Map<String, String> persistentVars = new HashMap<String, String>();
149 private Map<String, Object> transientVars = new HashMap<String, Object>();
150
151 protected LiteWorkflowInstance() {
152 log = XLog.getLog(getClass());
153 }
154
155 public LiteWorkflowInstance(LiteWorkflowApp def, Configuration conf, String instanceId) {
156 this();
157 this.def = ParamChecker.notNull(def, "def");
158 this.instanceId = ParamChecker.notNull(instanceId, "instanceId");
159 this.conf = ParamChecker.notNull(conf, "conf");
160 refreshLog();
161 status = Status.PREP;
162 }
163
164 public synchronized boolean start() throws WorkflowException {
165 if (status != Status.PREP) {
166 throw new WorkflowException(ErrorCode.E0719);
167 }
168 log.debug(XLog.STD, "Starting job");
169 status = Status.RUNNING;
170 executionPaths.put(ROOT, new NodeInstance(StartNodeDef.START));
171 return signal(ROOT, StartNodeDef.START);
172 }
173
174 //todo if suspended store signal and use when resuming
175
176 public synchronized boolean signal(String executionPath, String signalValue) throws WorkflowException {
177 ParamChecker.notEmpty(executionPath, "executionPath");
178 ParamChecker.notNull(signalValue, "signalValue");
179 log.debug(XLog.STD, "Signaling job execution path [{0}] signal value [{1}]", executionPath, signalValue);
180 if (status != Status.RUNNING) {
181 throw new WorkflowException(ErrorCode.E0716);
182 }
183 NodeInstance nodeJob = executionPaths.get(executionPath);
184 if (nodeJob == null) {
185 status = Status.FAILED;
186 log.error("invalid execution path [{0}]", executionPath);
187 }
188 NodeDef nodeDef = null;
189 if (!status.isEndState()) {
190 nodeDef = def.getNode(nodeJob.nodeName);
191 if (nodeDef == null) {
192 status = Status.FAILED;
193 log.error("invalid transition [{0}]", nodeJob.nodeName);
194 }
195 }
196 if (!status.isEndState()) {
197 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
198 boolean exiting = true;
199
200 Context context = new Context(nodeDef, executionPath, signalValue);
201 if (!nodeJob.started) {
202 try {
203 nodeHandler.loopDetection(context);
204 exiting = nodeHandler.enter(context);
205 nodeJob.started = true;
206 }
207 catch (WorkflowException ex) {
208 status = Status.FAILED;
209 throw ex;
210 }
211 }
212
213 if (exiting) {
214 List<String> pathsToStart = new ArrayList<String>();
215 List<String> fullTransitions;
216 try {
217 fullTransitions = nodeHandler.multiExit(context);
218 int last = fullTransitions.size() - 1;
219 // TEST THIS
220 if (last >= 0) {
221 String transitionTo = getTransitionNode(fullTransitions.get(last));
222
223 persistentVars.put(nodeDef.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO,
224 transitionTo);
225 }
226 }
227 catch (WorkflowException ex) {
228 status = Status.FAILED;
229 throw ex;
230 }
231
232 if (context.status == Status.KILLED) {
233 status = Status.KILLED;
234 log.debug(XLog.STD, "Completing job, kill node [{0}]", nodeJob.nodeName);
235 }
236 else {
237 if (context.status == Status.FAILED) {
238 status = Status.FAILED;
239 log.debug(XLog.STD, "Completing job, fail node [{0}]", nodeJob.nodeName);
240 }
241 else {
242 if (context.status == Status.SUCCEEDED) {
243 status = Status.SUCCEEDED;
244 log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName);
245 }
246 /*
247 else if (context.status == Status.SUSPENDED) {
248 status = Status.SUSPENDED;
249 log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName);
250 }
251 */
252 else {
253 for (String fullTransition : fullTransitions) {
254 // this is the whole trick for forking, we need the
255 // executionpath and the transition
256 // in the case of no forking last element of
257 // executionpath is different from transition
258 // in the case of forking they are the same
259
260 log.debug(XLog.STD, "Exiting node [{0}] with transition[{1}]", nodeJob.nodeName,
261 fullTransition);
262
263 String execPathFromTransition = getExecutionPath(fullTransition);
264 String transition = getTransitionNode(fullTransition);
265 def.validateTransition(nodeJob.nodeName, transition);
266
267 NodeInstance nodeJobInPath = executionPaths.get(execPathFromTransition);
268 if ((nodeJobInPath == null) || (!transition.equals(nodeJobInPath.nodeName))) {
269 // TODO explain this IF better
270 // If the WfJob is signaled with the parent
271 // execution executionPath again
272 // The Fork node will execute again.. and replace
273 // the Node WorkflowJobBean
274 // so this is required to prevent that..
275 // Question : Should we throw an error in this case
276 // ??
277 executionPaths.put(execPathFromTransition, new NodeInstance(transition));
278 pathsToStart.add(execPathFromTransition);
279 }
280
281 }
282 // signal all new synch transitions
283 for (String pathToStart : pathsToStart) {
284 signal(pathToStart, "::synch::");
285 }
286 }
287 }
288 }
289 }
290 }
291 if (status.isEndState()) {
292 if (status == Status.FAILED) {
293 List<String> failedNodes = terminateNodes(status);
294 log.warn(XLog.STD, "Workflow completed [{0}], failing [{1}] running nodes", status, failedNodes
295 .size());
296 }
297 else {
298 List<String> killedNodes = terminateNodes(Status.KILLED);
299 if (killedNodes.size() > 1) {
300 log.warn(XLog.STD, "Workflow completed [{0}], killing [{1}] running nodes", status, killedNodes
301 .size());
302 }
303 }
304 }
305 return status.isEndState();
306 }
307
308 public synchronized void fail(String nodeName) throws WorkflowException {
309 if (status.isEndState()) {
310 throw new WorkflowException(ErrorCode.E0718);
311 }
312 String failedNode = failNode(nodeName);
313 if (failedNode != null) {
314 log.warn(XLog.STD, "Workflow Failed. Failing node [{0}]", failedNode);
315 }
316 else {
317 //TODO failed attempting to fail the action. EXCEPTION
318 }
319 List<String> killedNodes = killNodes();
320 if (killedNodes.size() > 1) {
321 log.warn(XLog.STD, "Workflow Failed, killing [{0}] nodes", killedNodes.size());
322 }
323 status = Status.FAILED;
324 }
325
326 public synchronized void kill() throws WorkflowException {
327 if (status.isEndState()) {
328 throw new WorkflowException(ErrorCode.E0718);
329 }
330 log.debug(XLog.STD, "Killing job");
331 List<String> killedNodes = killNodes();
332 if (killedNodes.size() > 1) {
333 log.warn(XLog.STD, "workflow killed, killing [{0}] nodes", killedNodes.size());
334 }
335 status = Status.KILLED;
336 }
337
338 public synchronized void suspend() throws WorkflowException {
339 if (status != Status.RUNNING) {
340 throw new WorkflowException(ErrorCode.E0716);
341 }
342 log.debug(XLog.STD, "Suspending job");
343 this.status = Status.SUSPENDED;
344 }
345
346 public boolean isSuspended() {
347 return (status == Status.SUSPENDED);
348 }
349
350 public synchronized void resume() throws WorkflowException {
351 if (status != Status.SUSPENDED) {
352 throw new WorkflowException(ErrorCode.E0717);
353 }
354 log.debug(XLog.STD, "Resuming job");
355 status = Status.RUNNING;
356 }
357
358 public void setVar(String name, String value) {
359 if (value != null) {
360 persistentVars.put(name, value);
361 }
362 else {
363 persistentVars.remove(name);
364 }
365 }
366
367 @Override
368 public Map<String, String> getAllVars() {
369 return persistentVars;
370 }
371
372 @Override
373 public void setAllVars(Map<String, String> varMap) {
374 persistentVars.putAll(varMap);
375 }
376
377 public String getVar(String name) {
378 return persistentVars.get(name);
379 }
380
381
382 public void setTransientVar(String name, Object value) {
383 if (value != null) {
384 transientVars.put(name, value);
385 }
386 else {
387 transientVars.remove(name);
388 }
389 }
390
391 public boolean hasTransientVar(String name) {
392 return transientVars.containsKey(name);
393 }
394
395 public Object getTransientVar(String name) {
396 return transientVars.get(name);
397 }
398
399 public boolean hasEnded() {
400 return status.isEndState();
401 }
402
403 private List<String> terminateNodes(Status endStatus) {
404 List<String> endNodes = new ArrayList<String>();
405 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
406 if (entry.getValue().started) {
407 NodeDef nodeDef = def.getNode(entry.getValue().nodeName);
408 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
409 try {
410 if (endStatus == Status.KILLED) {
411 nodeHandler.kill(new Context(nodeDef, entry.getKey(), null));
412 }
413 else {
414 if (endStatus == Status.FAILED) {
415 nodeHandler.fail(new Context(nodeDef, entry.getKey(), null));
416 }
417 }
418 endNodes.add(nodeDef.getName());
419 }
420 catch (Exception ex) {
421 log.warn(XLog.STD, "Error Changing node state to [{0}] for Node [{1}]", endStatus.toString(),
422 nodeDef.getName(), ex);
423 }
424 }
425 }
426 return endNodes;
427 }
428
429 private String failNode(String nodeName) {
430 String failedNode = null;
431 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
432 String node = entry.getKey();
433 NodeInstance nodeInstance = entry.getValue();
434 if (nodeInstance.started && nodeInstance.nodeName.equals(nodeName)) {
435 NodeDef nodeDef = def.getNode(nodeInstance.nodeName);
436 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
437 try {
438 nodeHandler.fail(new Context(nodeDef, node, null));
439 failedNode = nodeDef.getName();
440 nodeInstance.started = false;
441 }
442 catch (Exception ex) {
443 log.warn(XLog.STD, "Error failing node [{0}]", nodeDef.getName(), ex);
444 }
445 return failedNode;
446 }
447 }
448 return failedNode;
449 }
450
451 private List<String> killNodes() {
452 List<String> killedNodes = new ArrayList<String>();
453 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
454 String node = entry.getKey();
455 NodeInstance nodeInstance = entry.getValue();
456 if (nodeInstance.started) {
457 NodeDef nodeDef = def.getNode(nodeInstance.nodeName);
458 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
459 try {
460 nodeHandler.kill(new Context(nodeDef, node, null));
461 killedNodes.add(nodeDef.getName());
462 }
463 catch (Exception ex) {
464 log.warn(XLog.STD, "Error killing node [{0}]", nodeDef.getName(), ex);
465 }
466 }
467 }
468 return killedNodes;
469 }
470
471 public LiteWorkflowApp getProcessDefinition() {
472 return def;
473 }
474
475 private static String createChildPath(String path, String child) {
476 return path + child + PATH_SEPARATOR;
477 }
478
479 private static String getParentPath(String path) {
480 path = path.substring(0, path.length() - 1);
481 return (path.length() == 0) ? null : path.substring(0, path.lastIndexOf(PATH_SEPARATOR) + 1);
482 }
483
484 private static String createFullTransition(String executionPath, String transition) {
485 return executionPath + TRANSITION_SEPARATOR + transition;
486 }
487
488 private static String getExecutionPath(String fullTransition) {
489 int index = fullTransition.indexOf(TRANSITION_SEPARATOR);
490 if (index == -1) {
491 throw new IllegalArgumentException("Invalid fullTransition");
492 }
493 return fullTransition.substring(0, index);
494 }
495
496 private static String getTransitionNode(String fullTransition) {
497 int index = fullTransition.indexOf(TRANSITION_SEPARATOR);
498 if (index == -1) {
499 throw new IllegalArgumentException("Invalid fullTransition");
500 }
501 return fullTransition.substring(index + 1);
502 }
503
504 private NodeHandler newInstance(Class<? extends NodeHandler> handler) {
505 return (NodeHandler) ReflectionUtils.newInstance(handler, null);
506 }
507
508 private void refreshLog() {
509 XLog.Info.get().setParameter(XLogService.USER, conf.get(OozieClient.USER_NAME));
510 XLog.Info.get().setParameter(XLogService.GROUP, conf.get(OozieClient.GROUP_NAME));
511 XLog.Info.get().setParameter(DagXLogInfoService.APP, def.getName());
512 XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN, ""));
513 XLog.Info.get().setParameter(DagXLogInfoService.JOB, instanceId);
514 log = XLog.getLog(getClass());
515 }
516
517 public Status getStatus() {
518 return status;
519 }
520
521 public void setStatus(Status status) {
522 this.status = status;
523 }
524
525 @Override
526 public void write(DataOutput dOut) throws IOException {
527 dOut.writeUTF(instanceId);
528
529 //Hadoop Configuration has to get its act right
530 ByteArrayOutputStream baos = new ByteArrayOutputStream();
531 conf.writeXml(baos);
532 baos.close();
533 byte[] array = baos.toByteArray();
534 dOut.writeInt(array.length);
535 dOut.write(array);
536
537 def.write(dOut);
538 dOut.writeUTF(status.toString());
539 dOut.writeInt(executionPaths.size());
540 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
541 dOut.writeUTF(entry.getKey());
542 dOut.writeUTF(entry.getValue().nodeName);
543 dOut.writeBoolean(entry.getValue().started);
544 }
545 dOut.writeInt(persistentVars.size());
546 for (Map.Entry<String, String> entry : persistentVars.entrySet()) {
547 dOut.writeUTF(entry.getKey());
548 dOut.writeUTF(entry.getValue());
549 }
550 }
551
552 @Override
553 public void readFields(DataInput dIn) throws IOException {
554 instanceId = dIn.readUTF();
555
556 //Hadoop Configuration has to get its act right
557 int len = dIn.readInt();
558 byte[] array = new byte[len];
559 dIn.readFully(array);
560 ByteArrayInputStream bais = new ByteArrayInputStream(array);
561 conf = new XConfiguration(bais);
562
563 def = new LiteWorkflowApp();
564 def.readFields(dIn);
565 status = Status.valueOf(dIn.readUTF());
566 int numExPaths = dIn.readInt();
567 for (int x = 0; x < numExPaths; x++) {
568 String path = dIn.readUTF();
569 String nodeName = dIn.readUTF();
570 boolean isStarted = dIn.readBoolean();
571 NodeInstance nodeInstance = new NodeInstance(nodeName);
572 nodeInstance.started = isStarted;
573 executionPaths.put(path, nodeInstance);
574 }
575 int numVars = dIn.readInt();
576 for (int x = 0; x < numVars; x++) {
577 String vName = dIn.readUTF();
578 String vVal = dIn.readUTF();
579 persistentVars.put(vName, vVal);
580 }
581 refreshLog();
582 }
583
584 @Override
585 public Configuration getConf() {
586 return conf;
587 }
588
589 @Override
590 public WorkflowApp getApp() {
591 return def;
592 }
593
594 @Override
595 public String getId() {
596 return instanceId;
597 }
598
599 @Override
600 public String getTransition(String node) {
601 return persistentVars.get(node + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO);
602 }
603
604 public boolean equals(Object o) {
605 return (o != null) && (getClass().isInstance(o)) && ((WorkflowInstance) o).getId().equals(instanceId);
606 }
607
608 public int hashCode() {
609 return instanceId.hashCode();
610 }
611
612 }