001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.InputStreamReader; 025 import java.net.InetSocketAddress; 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.List; 029 import java.util.Map; 030 import java.util.Vector; 031 032 import org.apache.commons.cli.CommandLine; 033 import org.apache.commons.cli.GnuParser; 034 import org.apache.commons.cli.HelpFormatter; 035 import org.apache.commons.cli.Options; 036 import org.apache.commons.cli.ParseException; 037 import org.apache.commons.logging.Log; 038 import org.apache.commons.logging.LogFactory; 039 import org.apache.hadoop.classification.InterfaceAudience; 040 import org.apache.hadoop.classification.InterfaceStability; 041 import org.apache.hadoop.conf.Configuration; 042 import org.apache.hadoop.fs.FileStatus; 043 import org.apache.hadoop.fs.FileSystem; 044 import org.apache.hadoop.fs.Path; 045 import org.apache.hadoop.net.NetUtils; 046 import org.apache.hadoop.security.SecurityInfo; 047 import org.apache.hadoop.yarn.api.ApplicationConstants; 048 import org.apache.hadoop.yarn.api.ClientRMProtocol; 049 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; 050 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; 051 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; 052 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; 053 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; 054 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; 055 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; 056 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; 057 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; 058 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; 059 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; 060 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; 061 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; 062 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; 063 import org.apache.hadoop.yarn.api.records.ApplicationId; 064 import org.apache.hadoop.yarn.api.records.ApplicationReport; 065 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; 066 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 067 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 068 import org.apache.hadoop.yarn.api.records.LocalResource; 069 import org.apache.hadoop.yarn.api.records.LocalResourceType; 070 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 071 import org.apache.hadoop.yarn.api.records.NodeReport; 072 import org.apache.hadoop.yarn.api.records.Priority; 073 import org.apache.hadoop.yarn.api.records.QueueACL; 074 import org.apache.hadoop.yarn.api.records.QueueInfo; 075 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; 076 import org.apache.hadoop.yarn.api.records.Resource; 077 import org.apache.hadoop.yarn.api.records.YarnApplicationState; 078 import org.apache.hadoop.yarn.conf.YarnConfiguration; 079 import org.apache.hadoop.yarn.exceptions.YarnRemoteException; 080 import org.apache.hadoop.yarn.ipc.YarnRPC; 081 import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; 082 import org.apache.hadoop.yarn.util.ConverterUtils; 083 import org.apache.hadoop.yarn.util.Records; 084 085 086 /** 087 * Client for Distributed Shell application submission to YARN. 088 * 089 * <p> The distributed shell client allows an application master to be launched that in turn would run 090 * the provided shell command on a set of containers. </p> 091 * 092 * <p>This client is meant to act as an example on how to write yarn-based applications. </p> 093 * 094 * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 095 * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 096 * provides a way for the client to get access to cluster information and to request for a 097 * new {@link ApplicationId}. <p> 098 * 099 * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 100 * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 101 * and application name, user submitting the application, the priority assigned to the application and the queue 102 * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} 103 * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 104 * the {@link ApplicationMaster} is launched. </p> 105 * 106 * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 107 * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 108 * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 109 * {@link ApplicationMaster}. <p> 110 * 111 * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 112 * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 113 * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 114 * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p> 115 * 116 */ 117 @InterfaceAudience.Public 118 @InterfaceStability.Unstable 119 public class Client { 120 121 private static final Log LOG = LogFactory.getLog(Client.class); 122 123 // Configuration 124 private Configuration conf; 125 126 // RPC to communicate to RM 127 private YarnRPC rpc; 128 129 // Handle to talk to the Resource Manager/Applications Manager 130 private ClientRMProtocol applicationsManager; 131 132 // Application master specific info to register a new Application with RM/ASM 133 private String appName = ""; 134 // App master priority 135 private int amPriority = 0; 136 // Queue for App master 137 private String amQueue = ""; 138 // User to run app master as 139 private String amUser = ""; 140 // Amt. of memory resource to request for to run the App Master 141 private int amMemory = 10; 142 143 // Application master jar file 144 private String appMasterJar = ""; 145 // Main class to invoke application master 146 private String appMasterMainClass = ""; 147 148 // Shell command to be executed 149 private String shellCommand = ""; 150 // Location of shell script 151 private String shellScriptPath = ""; 152 // Args to be passed to the shell command 153 private String shellArgs = ""; 154 // Env variables to be setup for the shell command 155 private Map<String, String> shellEnv = new HashMap<String, String>(); 156 // Shell Command Container priority 157 private int shellCmdPriority = 0; 158 159 // Amt of memory to request for container in which shell script will be executed 160 private int containerMemory = 10; 161 // No. of containers in which the shell script needs to be executed 162 private int numContainers = 1; 163 164 // log4j.properties file 165 // if available, add to local resources and set into classpath 166 private String log4jPropFile = ""; 167 168 // Start time for client 169 private final long clientStartTime = System.currentTimeMillis(); 170 // Timeout threshold for client. Kill app after time interval expires. 171 private long clientTimeout = 600000; 172 173 // Debug flag 174 boolean debugFlag = false; 175 176 /** 177 * @param args Command line arguments 178 */ 179 public static void main(String[] args) { 180 boolean result = false; 181 try { 182 Client client = new Client(); 183 LOG.info("Initializing Client"); 184 boolean doRun = client.init(args); 185 if (!doRun) { 186 System.exit(0); 187 } 188 result = client.run(); 189 } catch (Throwable t) { 190 LOG.fatal("Error running CLient", t); 191 System.exit(1); 192 } 193 if (result) { 194 LOG.info("Application completed successfully"); 195 System.exit(0); 196 } 197 LOG.error("Application failed to complete successfully"); 198 System.exit(2); 199 } 200 201 /** 202 */ 203 public Client() throws Exception { 204 // Set up the configuration and RPC 205 conf = new Configuration(); 206 rpc = YarnRPC.create(conf); 207 } 208 209 /** 210 * Helper function to print out usage 211 * @param opts Parsed command line options 212 */ 213 private void printUsage(Options opts) { 214 new HelpFormatter().printHelp("Client", opts); 215 } 216 217 /** 218 * Parse command line options 219 * @param args Parsed command line options 220 * @return Whether the init was successful to run the client 221 */ 222 public boolean init(String[] args) throws ParseException { 223 224 Options opts = new Options(); 225 opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); 226 opts.addOption("priority", true, "Application Priority. Default 0"); 227 opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); 228 opts.addOption("user", true, "User to run the application as"); 229 opts.addOption("timeout", true, "Application timeout in milliseconds"); 230 opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); 231 opts.addOption("jar", true, "Jar file containing the application master"); 232 opts.addOption("class", true, "Main class to be run for the Application Master."); 233 opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); 234 opts.addOption("shell_script", true, "Location of the shell script to be executed"); 235 opts.addOption("shell_args", true, "Command line args for the shell script"); 236 opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); 237 opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); 238 opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); 239 opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); 240 opts.addOption("log_properties", true, "log4j.properties file"); 241 opts.addOption("debug", false, "Dump out debug information"); 242 opts.addOption("help", false, "Print usage"); 243 CommandLine cliParser = new GnuParser().parse(opts, args); 244 245 if (args.length == 0) { 246 printUsage(opts); 247 throw new IllegalArgumentException("No args specified for client to initialize"); 248 } 249 250 if (cliParser.hasOption("help")) { 251 printUsage(opts); 252 return false; 253 } 254 255 if (cliParser.hasOption("debug")) { 256 debugFlag = true; 257 258 } 259 260 appName = cliParser.getOptionValue("appname", "DistributedShell"); 261 amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); 262 amQueue = cliParser.getOptionValue("queue", ""); 263 amUser = cliParser.getOptionValue("user", ""); 264 amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); 265 266 if (amMemory < 0) { 267 throw new IllegalArgumentException("Invalid memory specified for application master, exiting." 268 + " Specified memory=" + amMemory); 269 } 270 271 if (!cliParser.hasOption("jar")) { 272 throw new IllegalArgumentException("No jar file specified for application master"); 273 } 274 275 appMasterJar = cliParser.getOptionValue("jar"); 276 appMasterMainClass = cliParser.getOptionValue("class", 277 "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"); 278 279 if (!cliParser.hasOption("shell_command")) { 280 throw new IllegalArgumentException("No shell command specified to be executed by application master"); 281 } 282 shellCommand = cliParser.getOptionValue("shell_command"); 283 284 if (cliParser.hasOption("shell_script")) { 285 shellScriptPath = cliParser.getOptionValue("shell_script"); 286 } 287 if (cliParser.hasOption("shell_args")) { 288 shellArgs = cliParser.getOptionValue("shell_args"); 289 } 290 if (cliParser.hasOption("shell_env")) { 291 String envs[] = cliParser.getOptionValues("shell_env"); 292 for (String env : envs) { 293 env = env.trim(); 294 int index = env.indexOf('='); 295 if (index == -1) { 296 shellEnv.put(env, ""); 297 continue; 298 } 299 String key = env.substring(0, index); 300 String val = ""; 301 if (index < (env.length()-1)) { 302 val = env.substring(index+1); 303 } 304 shellEnv.put(key, val); 305 } 306 } 307 shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); 308 309 containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); 310 numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); 311 312 if (containerMemory < 0 || numContainers < 1) { 313 throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." 314 + " Specified containerMemory=" + containerMemory 315 + ", numContainer=" + numContainers); 316 } 317 318 clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); 319 320 log4jPropFile = cliParser.getOptionValue("log_properties", ""); 321 322 return true; 323 } 324 325 /** 326 * Main run function for the client 327 * @return true if application completed successfully 328 * @throws IOException 329 */ 330 public boolean run() throws IOException { 331 LOG.info("Starting Client"); 332 333 // Connect to ResourceManager 334 connectToASM(); 335 assert(applicationsManager != null); 336 337 // Use ClientRMProtocol handle to general cluster information 338 GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class); 339 GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq); 340 LOG.info("Got Cluster metric info from ASM" 341 + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers()); 342 343 GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); 344 GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); 345 LOG.info("Got Cluster node info from ASM"); 346 for (NodeReport node : clusterNodesResp.getNodeReports()) { 347 LOG.info("Got node report from ASM for" 348 + ", nodeId=" + node.getNodeId() 349 + ", nodeAddress" + node.getHttpAddress() 350 + ", nodeRackName" + node.getRackName() 351 + ", nodeNumContainers" + node.getNumContainers() 352 + ", nodeHealthStatus" + node.getNodeHealthStatus()); 353 } 354 355 GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class); 356 GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq); 357 QueueInfo queueInfo = queueInfoResp.getQueueInfo(); 358 LOG.info("Queue info" 359 + ", queueName=" + queueInfo.getQueueName() 360 + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() 361 + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() 362 + ", queueApplicationCount=" + queueInfo.getApplications().size() 363 + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); 364 365 GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class); 366 GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq); 367 List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList(); 368 for (QueueUserACLInfo aclInfo : listAclInfo) { 369 for (QueueACL userAcl : aclInfo.getUserAcls()) { 370 LOG.info("User ACL Info for Queue" 371 + ", queueName=" + aclInfo.getQueueName() 372 + ", userAcl=" + userAcl.name()); 373 } 374 } 375 376 // Get a new application id 377 GetNewApplicationResponse newApp = getApplication(); 378 ApplicationId appId = newApp.getApplicationId(); 379 380 // TODO get min/max resource capabilities from RM and change memory ask if needed 381 // If we do not have min/max, we may not be able to correctly request 382 // the required resources from the RM for the app master 383 // Memory ask has to be a multiple of min and less than max. 384 // Dump out information about cluster capability as seen by the resource manager 385 int minMem = newApp.getMinimumResourceCapability().getMemory(); 386 int maxMem = newApp.getMaximumResourceCapability().getMemory(); 387 LOG.info("Min mem capabililty of resources in this cluster " + minMem); 388 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 389 390 // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 391 // a multiple of the min value and cannot exceed the max. 392 // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min 393 if (amMemory < minMem) { 394 LOG.info("AM memory specified below min threshold of cluster. Using min value." 395 + ", specified=" + amMemory 396 + ", min=" + minMem); 397 amMemory = minMem; 398 } 399 else if (amMemory > maxMem) { 400 LOG.info("AM memory specified above max threshold of cluster. Using max value." 401 + ", specified=" + amMemory 402 + ", max=" + maxMem); 403 amMemory = maxMem; 404 } 405 406 // Create launch context for app master 407 LOG.info("Setting up application submission context for ASM"); 408 ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); 409 410 // set the application id 411 appContext.setApplicationId(appId); 412 // set the application name 413 appContext.setApplicationName(appName); 414 415 // Set up the container launch context for the application master 416 ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); 417 418 // set local resources for the application master 419 // local files or archives as needed 420 // In this scenario, the jar file for the application master is part of the local resources 421 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 422 423 LOG.info("Copy App Master jar from local filesystem and add to local environment"); 424 // Copy the application master jar to the filesystem 425 // Create a local resource to point to the destination jar path 426 FileSystem fs = FileSystem.get(conf); 427 Path src = new Path(appMasterJar); 428 String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; 429 Path dst = new Path(fs.getHomeDirectory(), pathSuffix); 430 fs.copyFromLocalFile(false, true, src, dst); 431 FileStatus destStatus = fs.getFileStatus(dst); 432 LocalResource amJarRsrc = Records.newRecord(LocalResource.class); 433 434 // Set the type of resource - file or archive 435 // archives are untarred at destination 436 // we don't need the jar file to be untarred for now 437 amJarRsrc.setType(LocalResourceType.FILE); 438 // Set visibility of the resource 439 // Setting to most private option 440 amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 441 // Set the resource to be copied over 442 amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 443 // Set timestamp and length of file so that the framework 444 // can do basic sanity checks for the local resource 445 // after it has been copied over to ensure it is the same 446 // resource the client intended to use with the application 447 amJarRsrc.setTimestamp(destStatus.getModificationTime()); 448 amJarRsrc.setSize(destStatus.getLen()); 449 localResources.put("AppMaster.jar", amJarRsrc); 450 451 // Set the log4j properties if needed 452 if (!log4jPropFile.isEmpty()) { 453 Path log4jSrc = new Path(log4jPropFile); 454 Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); 455 fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); 456 FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); 457 LocalResource log4jRsrc = Records.newRecord(LocalResource.class); 458 log4jRsrc.setType(LocalResourceType.FILE); 459 log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 460 log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); 461 log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); 462 log4jRsrc.setSize(log4jFileStatus.getLen()); 463 localResources.put("log4j.properties", log4jRsrc); 464 } 465 466 // The shell script has to be made available on the final container(s) 467 // where it will be executed. 468 // To do this, we need to first copy into the filesystem that is visible 469 // to the yarn framework. 470 // We do not need to set this as a local resource for the application 471 // master as the application master does not need it. 472 String hdfsShellScriptLocation = ""; 473 long hdfsShellScriptLen = 0; 474 long hdfsShellScriptTimestamp = 0; 475 if (!shellScriptPath.isEmpty()) { 476 Path shellSrc = new Path(shellScriptPath); 477 String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; 478 Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); 479 fs.copyFromLocalFile(false, true, shellSrc, shellDst); 480 hdfsShellScriptLocation = shellDst.toUri().toString(); 481 FileStatus shellFileStatus = fs.getFileStatus(shellDst); 482 hdfsShellScriptLen = shellFileStatus.getLen(); 483 hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); 484 } 485 486 // Set local resource info into app master container launch context 487 amContainer.setLocalResources(localResources); 488 489 // Set the necessary security tokens as needed 490 //amContainer.setContainerTokens(containerToken); 491 492 // Set the env variables to be setup in the env where the application master will be run 493 LOG.info("Set the environment for the application master"); 494 Map<String, String> env = new HashMap<String, String>(); 495 496 // put location of shell script into env 497 // using the env info, the application master will create the correct local resource for the 498 // eventual containers that will be launched to execute the shell scripts 499 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); 500 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); 501 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); 502 503 // Add AppMaster.jar location to classpath 504 // At some point we should not be required to add 505 // the hadoop specific classpaths to the env. 506 // It should be provided out of the box. 507 // For now setting all required classpaths including 508 // the classpath to "." for the application jar 509 String classPathEnv = "${CLASSPATH}" 510 + ":./*" 511 + ":$HADOOP_CONF_DIR" 512 + ":$HADOOP_COMMON_HOME/share/hadoop/common/*" 513 + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*" 514 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*" 515 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*" 516 + ":$YARN_HOME/modules/*" 517 + ":$YARN_HOME/lib/*" 518 + ":./log4j.properties:"; 519 520 // add the runtime classpath needed for tests to work 521 String testRuntimeClassPath = Client.getTestRuntimeClasspath(); 522 classPathEnv += ":" + testRuntimeClassPath; 523 524 env.put("CLASSPATH", classPathEnv); 525 526 amContainer.setEnvironment(env); 527 528 // Set the necessary command to execute the application master 529 Vector<CharSequence> vargs = new Vector<CharSequence>(30); 530 531 // Set java executable command 532 LOG.info("Setting up app master command"); 533 vargs.add("${JAVA_HOME}" + "/bin/java"); 534 // Set class name 535 vargs.add(appMasterMainClass); 536 // Set params for Application Master 537 vargs.add("--container_memory " + String.valueOf(containerMemory)); 538 vargs.add("--num_containers " + String.valueOf(numContainers)); 539 vargs.add("--priority " + String.valueOf(shellCmdPriority)); 540 if (!shellCommand.isEmpty()) { 541 vargs.add("--shell_command " + shellCommand + ""); 542 } 543 if (!shellArgs.isEmpty()) { 544 vargs.add("--shell_args " + shellArgs + ""); 545 } 546 for (Map.Entry<String, String> entry : shellEnv.entrySet()) { 547 vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); 548 } 549 if (debugFlag) { 550 vargs.add("--debug"); 551 } 552 553 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); 554 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); 555 556 // Get final commmand 557 StringBuilder command = new StringBuilder(); 558 for (CharSequence str : vargs) { 559 command.append(str).append(" "); 560 } 561 562 LOG.info("Completed setting up app master command " + command.toString()); 563 List<String> commands = new ArrayList<String>(); 564 commands.add(command.toString()); 565 amContainer.setCommands(commands); 566 567 // For launching an AM Container, setting user here is not needed 568 // Set user in ApplicationSubmissionContext 569 // amContainer.setUser(amUser); 570 571 // Set up resource type requirements 572 // For now, only memory is supported so we set memory requirements 573 Resource capability = Records.newRecord(Resource.class); 574 capability.setMemory(amMemory); 575 amContainer.setResource(capability); 576 577 // Service data is a binary blob that can be passed to the application 578 // Not needed in this scenario 579 // amContainer.setServiceData(serviceData); 580 581 // The following are not required for launching an application master 582 // amContainer.setContainerId(containerId); 583 584 appContext.setAMContainerSpec(amContainer); 585 586 // Set the priority for the application master 587 Priority pri = Records.newRecord(Priority.class); 588 // TODO - what is the range for priority? how to decide? 589 pri.setPriority(amPriority); 590 appContext.setPriority(pri); 591 592 // Set the queue to which this application is to be submitted in the RM 593 appContext.setQueue(amQueue); 594 // Set the user submitting this application 595 // TODO can it be empty? 596 appContext.setUser(amUser); 597 598 // Create the request to send to the applications manager 599 SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); 600 appRequest.setApplicationSubmissionContext(appContext); 601 602 // Submit the application to the applications manager 603 // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); 604 // Ignore the response as either a valid response object is returned on success 605 // or an exception thrown to denote some form of a failure 606 LOG.info("Submitting application to ASM"); 607 applicationsManager.submitApplication(appRequest); 608 609 // TODO 610 // Try submitting the same request again 611 // app submission failure? 612 613 // Monitor the application 614 return monitorApplication(appId); 615 616 } 617 618 /** 619 * Monitor the submitted application for completion. 620 * Kill application if time expires. 621 * @param appId Application Id of application to be monitored 622 * @return true if application completed successfully 623 * @throws YarnRemoteException 624 */ 625 private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException { 626 627 while (true) { 628 629 // Check app status every 1 second. 630 try { 631 Thread.sleep(1000); 632 } catch (InterruptedException e) { 633 LOG.debug("Thread sleep in monitoring loop interrupted"); 634 } 635 636 // Get application report for the appId we are interested in 637 GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); 638 reportRequest.setApplicationId(appId); 639 GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest); 640 ApplicationReport report = reportResponse.getApplicationReport(); 641 642 LOG.info("Got application report from ASM for" 643 + ", appId=" + appId.getId() 644 + ", clientToken=" + report.getClientToken() 645 + ", appDiagnostics=" + report.getDiagnostics() 646 + ", appMasterHost=" + report.getHost() 647 + ", appQueue=" + report.getQueue() 648 + ", appMasterRpcPort=" + report.getRpcPort() 649 + ", appStartTime=" + report.getStartTime() 650 + ", yarnAppState=" + report.getYarnApplicationState().toString() 651 + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() 652 + ", appTrackingUrl=" + report.getTrackingUrl() 653 + ", appUser=" + report.getUser()); 654 655 YarnApplicationState state = report.getYarnApplicationState(); 656 FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); 657 if (YarnApplicationState.FINISHED == state) { 658 if (FinalApplicationStatus.SUCCEEDED == dsStatus) { 659 LOG.info("Application has completed successfully. Breaking monitoring loop"); 660 return true; 661 } 662 else { 663 LOG.info("Application did finished unsuccessfully." 664 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 665 + ". Breaking monitoring loop"); 666 return false; 667 } 668 } 669 else if (YarnApplicationState.KILLED == state 670 || YarnApplicationState.FAILED == state) { 671 LOG.info("Application did not finish." 672 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 673 + ". Breaking monitoring loop"); 674 return false; 675 } 676 677 if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { 678 LOG.info("Reached client specified timeout for application. Killing application"); 679 killApplication(appId); 680 return false; 681 } 682 } 683 684 } 685 686 /** 687 * Kill a submitted application by sending a call to the ASM 688 * @param appId Application Id to be killed. 689 * @throws YarnRemoteException 690 */ 691 private void killApplication(ApplicationId appId) throws YarnRemoteException { 692 KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); 693 // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 694 // the same time. 695 // If yes, can we kill a particular attempt only? 696 request.setApplicationId(appId); 697 // KillApplicationResponse response = applicationsManager.forceKillApplication(request); 698 // Response can be ignored as it is non-null on success or 699 // throws an exception in case of failures 700 applicationsManager.forceKillApplication(request); 701 } 702 703 /** 704 * Connect to the Resource Manager/Applications Manager 705 * @return Handle to communicate with the ASM 706 * @throws IOException 707 */ 708 private void connectToASM() throws IOException { 709 710 /* 711 UserGroupInformation user = UserGroupInformation.getCurrentUser(); 712 applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() { 713 public ClientRMProtocol run() { 714 InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get( 715 YarnConfiguration.RM_SCHEDULER_ADDRESS, 716 YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); 717 LOG.info("Connecting to ResourceManager at " + rmAddress); 718 Configuration appsManagerServerConf = new Configuration(conf); 719 appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, 720 ClientRMSecurityInfo.class, SecurityInfo.class); 721 ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf)); 722 return asm; 723 } 724 }); 725 */ 726 YarnConfiguration yarnConf = new YarnConfiguration(conf); 727 InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get( 728 YarnConfiguration.RM_ADDRESS, 729 YarnConfiguration.DEFAULT_RM_ADDRESS)); 730 LOG.info("Connecting to ResourceManager at " + rmAddress); 731 applicationsManager = ((ClientRMProtocol) rpc.getProxy( 732 ClientRMProtocol.class, rmAddress, conf)); 733 } 734 735 /** 736 * Get a new application from the ASM 737 * @return New Application 738 * @throws YarnRemoteException 739 */ 740 private GetNewApplicationResponse getApplication() throws YarnRemoteException { 741 GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); 742 GetNewApplicationResponse response = applicationsManager.getNewApplication(request); 743 LOG.info("Got new application id=" + response.getApplicationId()); 744 return response; 745 } 746 747 private static String getTestRuntimeClasspath() { 748 749 InputStream classpathFileStream = null; 750 BufferedReader reader = null; 751 String envClassPath = ""; 752 753 LOG.info("Trying to generate classpath for app master from current thread's classpath"); 754 try { 755 756 // Create classpath from generated classpath 757 // Check maven ppom.xml for generated classpath info 758 // Works if compile time env is same as runtime. Mainly tests. 759 ClassLoader thisClassLoader = 760 Thread.currentThread().getContextClassLoader(); 761 String generatedClasspathFile = "yarn-apps-ds-generated-classpath"; 762 classpathFileStream = 763 thisClassLoader.getResourceAsStream(generatedClasspathFile); 764 if (classpathFileStream == null) { 765 LOG.info("Could not classpath resource from class loader"); 766 return envClassPath; 767 } 768 LOG.info("Readable bytes from stream=" + classpathFileStream.available()); 769 reader = new BufferedReader(new InputStreamReader(classpathFileStream)); 770 String cp = reader.readLine(); 771 if (cp != null) { 772 envClassPath += cp.trim() + ":"; 773 } 774 // Put the file itself on classpath for tasks. 775 envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile(); 776 } catch (IOException e) { 777 LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage()); 778 } 779 780 try { 781 if (classpathFileStream != null) { 782 classpathFileStream.close(); 783 } 784 if (reader != null) { 785 reader.close(); 786 } 787 } catch (IOException e) { 788 LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage()); 789 } 790 return envClassPath; 791 } 792 793 }