001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.action.hadoop;
016
017 import java.io.BufferedReader;
018 import java.io.File;
019 import java.io.FileNotFoundException;
020 import java.io.IOException;
021 import java.io.InputStream;
022 import java.io.InputStreamReader;
023 import java.io.StringReader;
024 import java.net.ConnectException;
025 import java.net.URI;
026 import java.net.UnknownHostException;
027 import java.util.ArrayList;
028 import java.util.HashSet;
029 import java.util.List;
030 import java.util.Map;
031 import java.util.Properties;
032 import java.util.Set;
033
034 import org.apache.hadoop.conf.Configuration;
035 import org.apache.hadoop.filecache.DistributedCache;
036 import org.apache.hadoop.fs.FileSystem;
037 import org.apache.hadoop.fs.Path;
038 import org.apache.hadoop.fs.permission.AccessControlException;
039 import org.apache.hadoop.mapred.JobClient;
040 import org.apache.hadoop.mapred.JobConf;
041 import org.apache.hadoop.mapred.JobID;
042 import org.apache.hadoop.mapred.RunningJob;
043 import org.apache.hadoop.util.DiskChecker;
044 import org.apache.oozie.action.ActionExecutor;
045 import org.apache.oozie.action.ActionExecutorException;
046 import org.apache.oozie.client.OozieClient;
047 import org.apache.oozie.client.WorkflowAction;
048 import org.apache.oozie.service.HadoopAccessorException;
049 import org.apache.oozie.service.HadoopAccessorService;
050 import org.apache.oozie.service.Services;
051 import org.apache.oozie.service.WorkflowAppService;
052 import org.apache.oozie.servlet.CallbackServlet;
053 import org.apache.oozie.util.IOUtils;
054 import org.apache.oozie.util.PropertiesUtils;
055 import org.apache.oozie.util.XConfiguration;
056 import org.apache.oozie.util.XLog;
057 import org.apache.oozie.util.XmlUtils;
058 import org.jdom.Element;
059 import org.jdom.JDOMException;
060 import org.jdom.Namespace;
061
062 public class JavaActionExecutor extends ActionExecutor {
063
064 private static final String HADOOP_USER = "user.name";
065 private static final String HADOOP_UGI = "hadoop.job.ugi";
066 private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker";
067 private static final String HADOOP_NAME_NODE = "fs.default.name";
068
069 private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>();
070
071 private static int maxActionOutputLen;
072
073 private static final String SUCCEEDED = "SUCCEEDED";
074 private static final String KILLED = "KILLED";
075 private static final String FAILED = "FAILED";
076 private static final String FAILED_KILLED = "FAILED/KILLED";
077 private static final String RUNNING = "RUNNING";
078 private XLog log = XLog.getLog(getClass());
079
080 static {
081 DISALLOWED_PROPERTIES.add(HADOOP_USER);
082 DISALLOWED_PROPERTIES.add(HADOOP_UGI);
083 DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER);
084 DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE);
085 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME);
086 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME);
087 }
088
089 public JavaActionExecutor() {
090 this("java");
091 }
092
093 protected JavaActionExecutor(String type) {
094 super(type);
095 }
096
097 protected String getLauncherJarName() {
098 return getType() + "-launcher.jar";
099 }
100
101 protected List<Class> getLauncherClasses() {
102 List<Class> classes = new ArrayList<Class>();
103 classes.add(LauncherMapper.class);
104 classes.add(LauncherSecurityManager.class);
105 classes.add(LauncherException.class);
106 return classes;
107 }
108
109 @Override
110 public void initActionType() {
111 super.initActionType();
112 maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024);
113 try {
114 List<Class> classes = getLauncherClasses();
115 Class[] launcherClasses = classes.toArray(new Class[classes.size()]);
116 IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses);
117
118 registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001");
119 registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT,
120 "JA002");
121 registerError(DiskChecker.DiskOutOfSpaceException.class.getName(),
122 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003");
123 registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(),
124 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004");
125 registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(),
126 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005");
127 registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006");
128 registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007");
129 registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008");
130 registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009");
131 }
132 catch (IOException ex) {
133 throw new RuntimeException(ex);
134 }
135 }
136
137 void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException {
138 for (String prop : DISALLOWED_PROPERTIES) {
139 if (conf.get(prop) != null) {
140 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010",
141 "Property [{0}] not allowed in action [{1}] configuration", prop, confName);
142 }
143 }
144 }
145
146 Configuration createBaseHadoopConf(Context context, Element actionXml) {
147 Configuration conf = new XConfiguration();
148 conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER));
149 conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI));
150 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) {
151 conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get(
152 WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
153 }
154 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) {
155 conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get(
156 WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
157 }
158 conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME));
159 Namespace ns = actionXml.getNamespace();
160 String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim();
161 String nameNode = actionXml.getChild("name-node", ns).getTextTrim();
162 conf.set(HADOOP_JOB_TRACKER, jobTracker);
163 conf.set(HADOOP_NAME_NODE, nameNode);
164 conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true");
165 return conf;
166 }
167
168 Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException {
169 try {
170 Namespace ns = actionXml.getNamespace();
171 Element e = actionXml.getChild("configuration", ns);
172 if (e != null) {
173 String strConf = XmlUtils.prettyPrint(e).toString();
174 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
175
176 XConfiguration launcherConf = new XConfiguration();
177 for (Map.Entry<String, String> entry : inlineConf) {
178 if (entry.getKey().startsWith("oozie.launcher.")) {
179 String name = entry.getKey().substring("oozie.launcher.".length());
180 String value = entry.getValue();
181 // setting original KEY
182 launcherConf.set(entry.getKey(), value);
183 // setting un-prefixed key (to allow Hadoop job config
184 // for the launcher job
185 launcherConf.set(name, value);
186 }
187 }
188 checkForDisallowedProps(launcherConf, "inline launcher configuration");
189 XConfiguration.copy(launcherConf, conf);
190 }
191 return conf;
192 }
193 catch (IOException ex) {
194 throw convertException(ex);
195 }
196 }
197
198 protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException {
199 try {
200 Element actionXml = XmlUtils.parseXml(action.getConf());
201 return getActionFileSystem(context, actionXml);
202 }
203 catch (JDOMException ex) {
204 throw convertException(ex);
205 }
206 }
207
208 protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException {
209 try {
210 return context.getAppFileSystem();
211 }
212 catch (Exception ex) {
213 throw convertException(ex);
214 }
215 }
216
217 Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath)
218 throws ActionExecutorException {
219 try {
220 Namespace ns = actionXml.getNamespace();
221 Element e = actionXml.getChild("job-xml", ns);
222 if (e != null) {
223 String jobXml = e.getTextTrim();
224 Path path = new Path(appPath, jobXml);
225 FileSystem fs = getActionFileSystem(context, actionXml);
226 Configuration jobXmlConf = new XConfiguration(fs.open(path));
227 checkForDisallowedProps(jobXmlConf, "job-xml");
228 XConfiguration.copy(jobXmlConf, actionConf);
229 }
230 e = actionXml.getChild("configuration", ns);
231 if (e != null) {
232 String strConf = XmlUtils.prettyPrint(e).toString();
233 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
234 checkForDisallowedProps(inlineConf, "inline configuration");
235 XConfiguration.copy(inlineConf, actionConf);
236 }
237 return actionConf;
238 }
239 catch (IOException ex) {
240 throw convertException(ex);
241 }
242 }
243
244 Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive)
245 throws ActionExecutorException {
246 Path path = null;
247 try {
248 if (filePath.startsWith("/")) {
249 path = new Path(filePath);
250 }
251 else {
252 path = new Path(appPath, filePath);
253 }
254 URI uri = new URI(path.toUri().getPath());
255 if (archive) {
256 DistributedCache.addCacheArchive(uri, conf);
257 }
258 else {
259 String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
260 if (fileName.endsWith(".so") || fileName.contains(".so.")) { // .so files
261 if (!fileName.endsWith(".so")) {
262 int extAt = fileName.indexOf(".so.");
263 fileName = fileName.substring(0, extAt + 3);
264 }
265 uri = new Path(path.toString() + "#" + fileName).toUri();
266 uri = new URI(uri.getPath());
267 }
268 else if (fileName.endsWith(".jar")){ // .jar files
269 if (!fileName.contains("#")) {
270 path = new Path(uri.toString());
271
272 String user = conf.get("user.name");
273 String group = conf.get("group.name");
274 Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf);
275 }
276 }
277 else { // regular files
278 if (!fileName.contains("#")) {
279 uri = new Path(path.toString() + "#" + fileName).toUri();
280 uri = new URI(uri.getPath());
281 }
282 }
283 DistributedCache.addCacheFile(uri, conf);
284 }
285 DistributedCache.createSymlink(conf);
286 return conf;
287 }
288 catch (Exception ex) {
289 XLog.getLog(getClass()).debug(
290 "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf="
291 + XmlUtils.prettyPrint(conf).toString());
292 throw convertException(ex);
293 }
294 }
295
296 String getOozieLauncherJar(Context context) throws ActionExecutorException {
297 try {
298 return new Path(context.getActionDir(), getLauncherJarName()).toString();
299 }
300 catch (Exception ex) {
301 throw convertException(ex);
302 }
303 }
304
305 void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
306 try {
307 Path actionDir = context.getActionDir();
308 Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp");
309 if (!actionFs.exists(actionDir)) {
310 try {
311 actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path(
312 tempActionDir, getLauncherJarName()));
313 actionFs.rename(tempActionDir, actionDir);
314 }
315 catch (IOException ex) {
316 actionFs.delete(tempActionDir, true);
317 actionFs.delete(actionDir, true);
318 throw ex;
319 }
320 }
321 }
322 catch (Exception ex) {
323 throw convertException(ex);
324 }
325 }
326
327 void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
328 try {
329 Path actionDir = context.getActionDir();
330 if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false)
331 && actionFs.exists(actionDir)) {
332 actionFs.delete(actionDir, true);
333 }
334 }
335 catch (Exception ex) {
336 throw convertException(ex);
337 }
338 }
339
340 @SuppressWarnings("unchecked")
341 void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf)
342 throws ActionExecutorException {
343 Configuration proto = context.getProtoActionConf();
344
345 addToCache(conf, appPath, getOozieLauncherJar(context), false);
346
347 String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST);
348 if (paths != null) {
349 for (String path : paths) {
350 addToCache(conf, appPath, path, false);
351 }
352 }
353
354 for (Element eProp : (List<Element>) actionXml.getChildren()) {
355 if (eProp.getName().equals("file")) {
356 String path = eProp.getTextTrim();
357 addToCache(conf, appPath, path, false);
358 }
359 else {
360 if (eProp.getName().equals("archive")) {
361 String path = eProp.getTextTrim();
362 addToCache(conf, appPath, path, true);
363 }
364 }
365 }
366 }
367
368 protected String getLauncherMain(Configuration launcherConf, Element actionXml) {
369 Namespace ns = actionXml.getNamespace();
370 Element e = actionXml.getChild("main-class", ns);
371 return e.getTextTrim();
372 }
373
374 private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>();
375
376 static {
377 SPECIAL_PROPERTIES.add("mapred.job.queue.name");
378 SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal");
379 SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal");
380 }
381
382 @SuppressWarnings("unchecked")
383 JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf)
384 throws ActionExecutorException {
385 try {
386 Path appPath = new Path(context.getWorkflow().getAppPath());
387
388 // launcher job configuration
389 Configuration launcherConf = createBaseHadoopConf(context, actionXml);
390 setupLauncherConf(launcherConf, actionXml, appPath, context);
391
392 // we are doing init+copy because if not we are getting 'hdfs'
393 // scheme not known
394 // its seems that new JobConf(Conf) does not load defaults, it
395 // assumes parameter Conf does.
396 JobConf launcherJobConf = new JobConf();
397 XConfiguration.copy(launcherConf, launcherJobConf);
398 setLibFilesArchives(context, actionXml, appPath, launcherJobConf);
399 String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
400 .getAppName(), action.getName(), context.getWorkflow().getId());
401 launcherJobConf.setJobName(jobName);
402
403 String jobId = context.getWorkflow().getId();
404 String actionId = action.getId();
405 Path actionDir = context.getActionDir();
406 String recoveryId = context.getRecoveryId();
407
408 LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf);
409
410 LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml));
411
412 LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
413
414 Namespace ns = actionXml.getNamespace();
415 List<Element> list = actionXml.getChildren("arg", ns);
416 String[] args = new String[list.size()];
417 for (int i = 0; i < list.size(); i++) {
418 args[i] = list.get(i).getTextTrim();
419 }
420 LauncherMapper.setupMainArguments(launcherJobConf, args);
421
422 Element opt = actionXml.getChild("java-opts", ns);
423 if (opt != null) {
424 String opts = launcherConf.get("mapred.child.java.opts", "");
425 opts = opts + " " + opt.getTextTrim();
426 opts = opts.trim();
427 launcherJobConf.set("mapred.child.java.opts", opts);
428 }
429
430 // properties from action that are needed by the launcher (QUEUE
431 // NAME)
432 // maybe we should add queue to the WF schema, below job-tracker
433 for (String name : SPECIAL_PROPERTIES) {
434 String value = actionConf.get(name);
435 if (value != null) {
436 launcherJobConf.set(name, value);
437 }
438 }
439
440 // to disable cancelation of delegation token on launcher job end
441 launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
442
443 // setting the group owning the Oozie job to allow anybody in that
444 // group to kill the jobs.
445 launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
446
447 return launcherJobConf;
448 }
449 catch (Exception ex) {
450 throw convertException(ex);
451 }
452 }
453
454 private void injectCallback(Context context, Configuration conf) {
455 String callback = context.getCallbackUrl("$jobStatus");
456 if (conf.get("job.end.notification.url") != null) {
457 XLog.getLog(getClass()).warn("Overriding the action job end notification URI");
458 }
459 conf.set("job.end.notification.url", callback);
460 }
461
462 void injectActionCallback(Context context, Configuration actionConf) {
463 injectCallback(context, actionConf);
464 }
465
466 void injectLauncherCallback(Context context, Configuration launcherConf) {
467 injectCallback(context, launcherConf);
468 }
469
470 void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException {
471 JobClient jobClient = null;
472 boolean exception = false;
473 try {
474 Path appPath = new Path(context.getWorkflow().getAppPath());
475 Element actionXml = XmlUtils.parseXml(action.getConf());
476
477 // action job configuration
478 Configuration actionConf = createBaseHadoopConf(context, actionXml);
479 setupActionConf(actionConf, context, actionXml, appPath);
480 XLog.getLog(getClass()).debug("Setting LibFilesArchives ");
481 setLibFilesArchives(context, actionXml, appPath, actionConf);
482 String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
483 .getAppName(), action.getName(), context.getWorkflow().getId());
484 actionConf.set("mapred.job.name", jobName);
485 injectActionCallback(context, actionConf);
486
487 // setting the group owning the Oozie job to allow anybody in that
488 // group to kill the jobs.
489 actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
490
491 JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf);
492 injectLauncherCallback(context, launcherJobConf);
493 XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId());
494 jobClient = createJobClient(context, launcherJobConf);
495 String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context
496 .getRecoveryId());
497 boolean alreadyRunning = launcherId != null;
498 RunningJob runningJob;
499
500 if (alreadyRunning) {
501 runningJob = jobClient.getJob(JobID.forName(launcherId));
502 if (runningJob == null) {
503 String jobTracker = launcherJobConf.get("mapred.job.tracker");
504 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
505 "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker);
506 }
507 }
508 else {
509 prepare(context, actionXml);
510 XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId());
511
512 // setting up propagation of the delegation token.
513 AuthHelper.get().set(jobClient, launcherJobConf);
514 log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = "
515 + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
516 log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = "
517 + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
518 runningJob = jobClient.submitJob(launcherJobConf);
519 if (runningJob == null) {
520 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
521 "Error submitting launcher for action [{0}]", action.getId());
522 }
523 launcherId = runningJob.getID().toString();
524 XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId);
525 }
526
527 String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER);
528 String consoleUrl = runningJob.getTrackingURL();
529 context.setStartData(launcherId, jobTracker, consoleUrl);
530 }
531 catch (Exception ex) {
532 exception = true;
533 throw convertException(ex);
534 }
535 finally {
536 if (jobClient != null) {
537 try {
538 jobClient.close();
539 }
540 catch (Exception e) {
541 if (exception) {
542 log.error("JobClient error: ", e);
543 }
544 else {
545 throw convertException(e);
546 }
547 }
548 }
549 }
550 }
551
552 void prepare(Context context, Element actionXml) throws ActionExecutorException {
553 Namespace ns = actionXml.getNamespace();
554 Element prepare = actionXml.getChild("prepare", ns);
555 if (prepare != null) {
556 XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation");
557 FsActionExecutor fsAe = new FsActionExecutor();
558 fsAe.doOperations(context, prepare);
559 XLog.getLog(getClass()).debug("FS Operation is completed");
560 }
561 }
562
563 @Override
564 public void start(Context context, WorkflowAction action) throws ActionExecutorException {
565 try {
566 XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System");
567 FileSystem actionFs = getActionFileSystem(context, action);
568 XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir());
569 prepareActionDir(actionFs, context);
570 XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action ");
571 submitLauncher(context, action);
572 XLog.getLog(getClass()).debug("Action submit completed. Performing check ");
573 check(context, action);
574 XLog.getLog(getClass()).debug("Action check is done after submission");
575 }
576 catch (Exception ex) {
577 throw convertException(ex);
578 }
579 }
580
581 @Override
582 public void end(Context context, WorkflowAction action) throws ActionExecutorException {
583 try {
584 String externalStatus = action.getExternalStatus();
585 WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK
586 : WorkflowAction.Status.ERROR;
587 context.setEndData(status, getActionSignal(status));
588 }
589 catch (Exception ex) {
590 throw convertException(ex);
591 }
592 finally {
593 try {
594 FileSystem actionFs = getActionFileSystem(context, action);
595 cleanUpActionDir(actionFs, context);
596 }
597 catch (Exception ex) {
598 throw convertException(ex);
599 }
600 }
601 }
602
603 /**
604 * Create job client object
605 * @param context
606 * @param jobConf
607 * @return
608 * @throws HadoopAccessorException
609 */
610 protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException {
611 String user = context.getWorkflow().getUser();
612 String group = context.getWorkflow().getGroup();
613 return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf);
614 }
615
616 @Override
617 public void check(Context context, WorkflowAction action) throws ActionExecutorException {
618 JobClient jobClient = null;
619 boolean exception = false;
620 try {
621 Element actionXml = XmlUtils.parseXml(action.getConf());
622 FileSystem actionFs = getActionFileSystem(context, actionXml);
623 Configuration conf = createBaseHadoopConf(context, actionXml);
624 JobConf jobConf = new JobConf();
625 XConfiguration.copy(conf, jobConf);
626 jobClient = createJobClient(context, jobConf);
627 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
628 if (runningJob == null) {
629 context.setExternalStatus(FAILED);
630 context.setExecutionData(FAILED, null);
631 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
632 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", action
633 .getExternalId(), action.getId());
634 }
635 if (runningJob.isComplete()) {
636 Path actionDir = context.getActionDir();
637
638 String user = context.getWorkflow().getUser();
639 String group = context.getWorkflow().getGroup();
640 if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) {
641 String launcherId = action.getExternalId();
642 Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir());
643 InputStream is = actionFs.open(idSwapPath);
644 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
645 Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
646 reader.close();
647 String newId = props.getProperty("id");
648 runningJob = jobClient.getJob(JobID.forName(newId));
649 if (runningJob == null) {
650 context.setExternalStatus(FAILED);
651 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
652 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", newId,
653 action.getId());
654 }
655
656 context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL());
657 XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId,
658 newId);
659 }
660 if (runningJob.isComplete()) {
661 XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]",
662 action.getExternalId());
663 if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) {
664 Properties props = null;
665 if (getCaptureOutput(action)) {
666 props = new Properties();
667 if (LauncherMapper.hasOutputData(runningJob)) {
668 Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir());
669 InputStream is = actionFs.open(actionOutput);
670 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
671 props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
672 reader.close();
673 }
674 }
675 context.setExecutionData(SUCCEEDED, props);
676 XLog.getLog(getClass()).info(XLog.STD, "action produced output");
677 }
678 else {
679 XLog log = XLog.getLog(getClass());
680 String errorReason;
681 Path actionError = LauncherMapper.getErrorPath(context.getActionDir());
682 if (actionFs.exists(actionError)) {
683 InputStream is = actionFs.open(actionError);
684 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
685 Properties props = PropertiesUtils.readProperties(reader, -1);
686 reader.close();
687 errorReason = props.getProperty("error.reason");
688 log.warn("Launcher ERROR, reason: {0}", errorReason);
689 String exMsg = props.getProperty("exception.message");
690 String errorInfo = (exMsg != null) ? exMsg : errorReason;
691 context.setErrorInfo("JA018", errorInfo);
692 String exStackTrace = props.getProperty("exception.stacktrace");
693 if (exMsg != null) {
694 log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace);
695 }
696 }
697 else {
698 errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action
699 .getTrackerUri(), action.getExternalId());
700 log.warn(errorReason);
701 }
702 context.setExecutionData(FAILED_KILLED, null);
703 }
704 }
705 else {
706 context.setExternalStatus(RUNNING);
707 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
708 action.getExternalId(), action.getExternalStatus());
709 }
710 }
711 else {
712 context.setExternalStatus(RUNNING);
713 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
714 action.getExternalId(), action.getExternalStatus());
715 }
716 }
717 catch (Exception ex) {
718 XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex);
719 exception = true;
720 throw convertException(ex);
721 }
722 finally {
723 if (jobClient != null) {
724 try {
725 jobClient.close();
726 }
727 catch (Exception e) {
728 if (exception) {
729 log.error("JobClient error: ", e);
730 }
731 else {
732 throw convertException(e);
733 }
734 }
735 }
736 }
737 }
738
739 protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException {
740 Element eConf = XmlUtils.parseXml(action.getConf());
741 Namespace ns = eConf.getNamespace();
742 Element captureOutput = eConf.getChild("capture-output", ns);
743 return captureOutput != null;
744 }
745
746 @Override
747 public void kill(Context context, WorkflowAction action) throws ActionExecutorException {
748 JobClient jobClient = null;
749 boolean exception = false;
750 try {
751 Element actionXml = XmlUtils.parseXml(action.getConf());
752 Configuration conf = createBaseHadoopConf(context, actionXml);
753 JobConf jobConf = new JobConf();
754 XConfiguration.copy(conf, jobConf);
755 jobClient = createJobClient(context, jobConf);
756 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
757 if (runningJob != null) {
758 runningJob.killJob();
759 }
760 context.setExternalStatus(KILLED);
761 context.setExecutionData(KILLED, null);
762 }
763 catch (Exception ex) {
764 exception = true;
765 throw convertException(ex);
766 }
767 finally {
768 try {
769 FileSystem actionFs = getActionFileSystem(context, action);
770 cleanUpActionDir(actionFs, context);
771 if (jobClient != null) {
772 jobClient.close();
773 }
774 }
775 catch (Exception ex) {
776 if (exception) {
777 log.error("Error: ", ex);
778 }
779 else {
780 throw convertException(ex);
781 }
782 }
783 }
784 }
785
786 private static Set<String> FINAL_STATUS = new HashSet<String>();
787
788 static {
789 FINAL_STATUS.add(SUCCEEDED);
790 FINAL_STATUS.add(KILLED);
791 FINAL_STATUS.add(FAILED);
792 FINAL_STATUS.add(FAILED_KILLED);
793 }
794
795 @Override
796 public boolean isCompleted(String externalStatus) {
797 return FINAL_STATUS.contains(externalStatus);
798 }
799
800 }