001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.mapreduce.tools; 019 020 import java.io.IOException; 021 import java.util.ArrayList; 022 import java.util.List; 023 024 import org.apache.commons.logging.Log; 025 import org.apache.commons.logging.LogFactory; 026 import org.apache.hadoop.classification.InterfaceAudience; 027 import org.apache.hadoop.classification.InterfaceStability; 028 import org.apache.hadoop.conf.Configuration; 029 import org.apache.hadoop.conf.Configured; 030 import org.apache.hadoop.ipc.RemoteException; 031 import org.apache.hadoop.mapred.JobConf; 032 import org.apache.hadoop.mapred.TIPStatus; 033 import org.apache.hadoop.mapreduce.Cluster; 034 import org.apache.hadoop.mapreduce.Counters; 035 import org.apache.hadoop.mapreduce.Job; 036 import org.apache.hadoop.mapreduce.JobID; 037 import org.apache.hadoop.mapreduce.JobPriority; 038 import org.apache.hadoop.mapreduce.JobStatus; 039 import org.apache.hadoop.mapreduce.TaskAttemptID; 040 import org.apache.hadoop.mapreduce.TaskCompletionEvent; 041 import org.apache.hadoop.mapreduce.TaskReport; 042 import org.apache.hadoop.mapreduce.TaskTrackerInfo; 043 import org.apache.hadoop.mapreduce.TaskType; 044 import org.apache.hadoop.mapreduce.jobhistory.HistoryViewer; 045 import org.apache.hadoop.mapreduce.v2.LogParams; 046 import org.apache.hadoop.security.AccessControlException; 047 import org.apache.hadoop.util.Tool; 048 import org.apache.hadoop.util.ToolRunner; 049 import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.LogDumper; 050 051 /** 052 * Interprets the map reduce cli options 053 */ 054 @InterfaceAudience.Public 055 @InterfaceStability.Stable 056 public class CLI extends Configured implements Tool { 057 private static final Log LOG = LogFactory.getLog(CLI.class); 058 private Cluster cluster; 059 060 public CLI() { 061 } 062 063 public CLI(Configuration conf) { 064 setConf(conf); 065 } 066 067 public int run(String[] argv) throws Exception { 068 int exitCode = -1; 069 if (argv.length < 1) { 070 displayUsage(""); 071 return exitCode; 072 } 073 // process arguments 074 String cmd = argv[0]; 075 String submitJobFile = null; 076 String jobid = null; 077 String taskid = null; 078 String historyFile = null; 079 String counterGroupName = null; 080 String counterName = null; 081 JobPriority jp = null; 082 String taskType = null; 083 String taskState = null; 084 int fromEvent = 0; 085 int nEvents = 0; 086 boolean getStatus = false; 087 boolean getCounter = false; 088 boolean killJob = false; 089 boolean listEvents = false; 090 boolean viewHistory = false; 091 boolean viewAllHistory = false; 092 boolean listJobs = false; 093 boolean listAllJobs = false; 094 boolean listActiveTrackers = false; 095 boolean listBlacklistedTrackers = false; 096 boolean displayTasks = false; 097 boolean killTask = false; 098 boolean failTask = false; 099 boolean setJobPriority = false; 100 boolean logs = false; 101 102 if ("-submit".equals(cmd)) { 103 if (argv.length != 2) { 104 displayUsage(cmd); 105 return exitCode; 106 } 107 submitJobFile = argv[1]; 108 } else if ("-status".equals(cmd)) { 109 if (argv.length != 2) { 110 displayUsage(cmd); 111 return exitCode; 112 } 113 jobid = argv[1]; 114 getStatus = true; 115 } else if("-counter".equals(cmd)) { 116 if (argv.length != 4) { 117 displayUsage(cmd); 118 return exitCode; 119 } 120 getCounter = true; 121 jobid = argv[1]; 122 counterGroupName = argv[2]; 123 counterName = argv[3]; 124 } else if ("-kill".equals(cmd)) { 125 if (argv.length != 2) { 126 displayUsage(cmd); 127 return exitCode; 128 } 129 jobid = argv[1]; 130 killJob = true; 131 } else if ("-set-priority".equals(cmd)) { 132 if (argv.length != 3) { 133 displayUsage(cmd); 134 return exitCode; 135 } 136 jobid = argv[1]; 137 try { 138 jp = JobPriority.valueOf(argv[2]); 139 } catch (IllegalArgumentException iae) { 140 LOG.info(iae); 141 displayUsage(cmd); 142 return exitCode; 143 } 144 setJobPriority = true; 145 } else if ("-events".equals(cmd)) { 146 if (argv.length != 4) { 147 displayUsage(cmd); 148 return exitCode; 149 } 150 jobid = argv[1]; 151 fromEvent = Integer.parseInt(argv[2]); 152 nEvents = Integer.parseInt(argv[3]); 153 listEvents = true; 154 } else if ("-history".equals(cmd)) { 155 if (argv.length != 2 && !(argv.length == 3 && "all".equals(argv[1]))) { 156 displayUsage(cmd); 157 return exitCode; 158 } 159 viewHistory = true; 160 if (argv.length == 3 && "all".equals(argv[1])) { 161 viewAllHistory = true; 162 historyFile = argv[2]; 163 } else { 164 historyFile = argv[1]; 165 } 166 } else if ("-list".equals(cmd)) { 167 if (argv.length != 1 && !(argv.length == 2 && "all".equals(argv[1]))) { 168 displayUsage(cmd); 169 return exitCode; 170 } 171 if (argv.length == 2 && "all".equals(argv[1])) { 172 listAllJobs = true; 173 } else { 174 listJobs = true; 175 } 176 } else if("-kill-task".equals(cmd)) { 177 if (argv.length != 2) { 178 displayUsage(cmd); 179 return exitCode; 180 } 181 killTask = true; 182 taskid = argv[1]; 183 } else if("-fail-task".equals(cmd)) { 184 if (argv.length != 2) { 185 displayUsage(cmd); 186 return exitCode; 187 } 188 failTask = true; 189 taskid = argv[1]; 190 } else if ("-list-active-trackers".equals(cmd)) { 191 if (argv.length != 1) { 192 displayUsage(cmd); 193 return exitCode; 194 } 195 listActiveTrackers = true; 196 } else if ("-list-blacklisted-trackers".equals(cmd)) { 197 if (argv.length != 1) { 198 displayUsage(cmd); 199 return exitCode; 200 } 201 listBlacklistedTrackers = true; 202 } else if ("-list-attempt-ids".equals(cmd)) { 203 if (argv.length != 4) { 204 displayUsage(cmd); 205 return exitCode; 206 } 207 jobid = argv[1]; 208 taskType = argv[2]; 209 taskState = argv[3]; 210 displayTasks = true; 211 } else if ("-logs".equals(cmd)) { 212 if (argv.length == 2 || argv.length ==3) { 213 logs = true; 214 jobid = argv[1]; 215 if (argv.length == 3) { 216 taskid = argv[2]; 217 } else { 218 taskid = null; 219 } 220 } else { 221 displayUsage(cmd); 222 return exitCode; 223 } 224 } else { 225 displayUsage(cmd); 226 return exitCode; 227 } 228 229 // initialize cluster 230 cluster = new Cluster(getConf()); 231 232 // Submit the request 233 try { 234 if (submitJobFile != null) { 235 Job job = Job.getInstance(new JobConf(submitJobFile)); 236 job.submit(); 237 System.out.println("Created job " + job.getJobID()); 238 exitCode = 0; 239 } else if (getStatus) { 240 Job job = cluster.getJob(JobID.forName(jobid)); 241 if (job == null) { 242 System.out.println("Could not find job " + jobid); 243 } else { 244 Counters counters = job.getCounters(); 245 System.out.println(); 246 System.out.println(job); 247 if (counters != null) { 248 System.out.println(counters); 249 } else { 250 System.out.println("Counters not available. Job is retired."); 251 } 252 exitCode = 0; 253 } 254 } else if (getCounter) { 255 Job job = cluster.getJob(JobID.forName(jobid)); 256 if (job == null) { 257 System.out.println("Could not find job " + jobid); 258 } else { 259 Counters counters = job.getCounters(); 260 if (counters == null) { 261 System.out.println("Counters not available for retired job " + 262 jobid); 263 exitCode = -1; 264 } else { 265 System.out.println(getCounter(counters, 266 counterGroupName, counterName)); 267 exitCode = 0; 268 } 269 } 270 } else if (killJob) { 271 Job job = cluster.getJob(JobID.forName(jobid)); 272 if (job == null) { 273 System.out.println("Could not find job " + jobid); 274 } else { 275 job.killJob(); 276 System.out.println("Killed job " + jobid); 277 exitCode = 0; 278 } 279 } else if (setJobPriority) { 280 Job job = cluster.getJob(JobID.forName(jobid)); 281 if (job == null) { 282 System.out.println("Could not find job " + jobid); 283 } else { 284 job.setPriority(jp); 285 System.out.println("Changed job priority."); 286 exitCode = 0; 287 } 288 } else if (viewHistory) { 289 viewHistory(historyFile, viewAllHistory); 290 exitCode = 0; 291 } else if (listEvents) { 292 listEvents(cluster.getJob(JobID.forName(jobid)), fromEvent, nEvents); 293 exitCode = 0; 294 } else if (listJobs) { 295 listJobs(cluster); 296 exitCode = 0; 297 } else if (listAllJobs) { 298 listAllJobs(cluster); 299 exitCode = 0; 300 } else if (listActiveTrackers) { 301 listActiveTrackers(cluster); 302 exitCode = 0; 303 } else if (listBlacklistedTrackers) { 304 listBlacklistedTrackers(cluster); 305 exitCode = 0; 306 } else if (displayTasks) { 307 displayTasks(cluster.getJob(JobID.forName(jobid)), taskType, taskState); 308 } else if(killTask) { 309 TaskAttemptID taskID = TaskAttemptID.forName(taskid); 310 Job job = cluster.getJob(taskID.getJobID()); 311 if (job == null) { 312 System.out.println("Could not find job " + jobid); 313 } else if (job.killTask(taskID)) { 314 System.out.println("Killed task " + taskid); 315 exitCode = 0; 316 } else { 317 System.out.println("Could not kill task " + taskid); 318 exitCode = -1; 319 } 320 } else if(failTask) { 321 TaskAttemptID taskID = TaskAttemptID.forName(taskid); 322 Job job = cluster.getJob(taskID.getJobID()); 323 if (job == null) { 324 System.out.println("Could not find job " + jobid); 325 } else if(job.failTask(taskID)) { 326 System.out.println("Killed task " + taskID + " by failing it"); 327 exitCode = 0; 328 } else { 329 System.out.println("Could not fail task " + taskid); 330 exitCode = -1; 331 } 332 } else if (logs) { 333 try { 334 JobID jobID = JobID.forName(jobid); 335 TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskid); 336 LogParams logParams = cluster.getLogParams(jobID, taskAttemptID); 337 LogDumper logDumper = new LogDumper(); 338 logDumper.setConf(getConf()); 339 logDumper.dumpAContainersLogs(logParams.getApplicationId(), 340 logParams.getContainerId(), logParams.getNodeId(), 341 logParams.getOwner()); 342 } catch (IOException e) { 343 if (e instanceof RemoteException) { 344 throw e; 345 } 346 System.out.println(e.getMessage()); 347 } 348 } 349 } catch (RemoteException re) { 350 IOException unwrappedException = re.unwrapRemoteException(); 351 if (unwrappedException instanceof AccessControlException) { 352 System.out.println(unwrappedException.getMessage()); 353 } else { 354 throw re; 355 } 356 } finally { 357 cluster.close(); 358 } 359 return exitCode; 360 } 361 362 private String getJobPriorityNames() { 363 StringBuffer sb = new StringBuffer(); 364 for (JobPriority p : JobPriority.values()) { 365 sb.append(p.name()).append(" "); 366 } 367 return sb.substring(0, sb.length()-1); 368 } 369 370 private String getTaskTypess() { 371 StringBuffer sb = new StringBuffer(); 372 for (TaskType t : TaskType.values()) { 373 sb.append(t.name()).append(" "); 374 } 375 return sb.substring(0, sb.length()-1); 376 } 377 378 /** 379 * Display usage of the command-line tool and terminate execution. 380 */ 381 private void displayUsage(String cmd) { 382 String prefix = "Usage: CLI "; 383 String jobPriorityValues = getJobPriorityNames(); 384 String taskTypes = getTaskTypess(); 385 String taskStates = "running, completed"; 386 if ("-submit".equals(cmd)) { 387 System.err.println(prefix + "[" + cmd + " <job-file>]"); 388 } else if ("-status".equals(cmd) || "-kill".equals(cmd)) { 389 System.err.println(prefix + "[" + cmd + " <job-id>]"); 390 } else if ("-counter".equals(cmd)) { 391 System.err.println(prefix + "[" + cmd + 392 " <job-id> <group-name> <counter-name>]"); 393 } else if ("-events".equals(cmd)) { 394 System.err.println(prefix + "[" + cmd + 395 " <job-id> <from-event-#> <#-of-events>]. Event #s start from 1."); 396 } else if ("-history".equals(cmd)) { 397 System.err.println(prefix + "[" + cmd + " <jobHistoryFile>]"); 398 } else if ("-list".equals(cmd)) { 399 System.err.println(prefix + "[" + cmd + " [all]]"); 400 } else if ("-kill-task".equals(cmd) || "-fail-task".equals(cmd)) { 401 System.err.println(prefix + "[" + cmd + " <task-attempt-id>]"); 402 } else if ("-set-priority".equals(cmd)) { 403 System.err.println(prefix + "[" + cmd + " <job-id> <priority>]. " + 404 "Valid values for priorities are: " 405 + jobPriorityValues); 406 } else if ("-list-active-trackers".equals(cmd)) { 407 System.err.println(prefix + "[" + cmd + "]"); 408 } else if ("-list-blacklisted-trackers".equals(cmd)) { 409 System.err.println(prefix + "[" + cmd + "]"); 410 } else if ("-list-attempt-ids".equals(cmd)) { 411 System.err.println(prefix + "[" + cmd + 412 " <job-id> <task-type> <task-state>]. " + 413 "Valid values for <task-type> are " + taskTypes + ". " + 414 "Valid values for <task-state> are " + taskStates); 415 } else if ("-logs".equals(cmd)) { 416 System.err.println(prefix + "[" + cmd + 417 " <job-id> <task-attempt-id>]. " + 418 " <task-attempt-id> is optional to get task attempt logs."); 419 } else { 420 System.err.printf(prefix + "<command> <args>\n"); 421 System.err.printf("\t[-submit <job-file>]\n"); 422 System.err.printf("\t[-status <job-id>]\n"); 423 System.err.printf("\t[-counter <job-id> <group-name> <counter-name>]\n"); 424 System.err.printf("\t[-kill <job-id>]\n"); 425 System.err.printf("\t[-set-priority <job-id> <priority>]. " + 426 "Valid values for priorities are: " + jobPriorityValues + "\n"); 427 System.err.printf("\t[-events <job-id> <from-event-#> <#-of-events>]\n"); 428 System.err.printf("\t[-history <jobHistoryFile>]\n"); 429 System.err.printf("\t[-list [all]]\n"); 430 System.err.printf("\t[-list-active-trackers]\n"); 431 System.err.printf("\t[-list-blacklisted-trackers]\n"); 432 System.err.println("\t[-list-attempt-ids <job-id> <task-type> " + 433 "<task-state>]. " + 434 "Valid values for <task-type> are " + taskTypes + ". " + 435 "Valid values for <task-state> are " + taskStates); 436 System.err.printf("\t[-kill-task <task-attempt-id>]\n"); 437 System.err.printf("\t[-fail-task <task-attempt-id>]\n"); 438 System.err.printf("\t[-logs <job-id> <task-attempt-id>]\n\n"); 439 ToolRunner.printGenericCommandUsage(System.out); 440 } 441 } 442 443 private void viewHistory(String historyFile, boolean all) 444 throws IOException { 445 HistoryViewer historyViewer = new HistoryViewer(historyFile, 446 getConf(), all); 447 historyViewer.print(); 448 } 449 450 protected long getCounter(Counters counters, String counterGroupName, 451 String counterName) throws IOException { 452 return counters.findCounter(counterGroupName, counterName).getValue(); 453 } 454 455 /** 456 * List the events for the given job 457 * @param jobId the job id for the job's events to list 458 * @throws IOException 459 */ 460 private void listEvents(Job job, int fromEventId, int numEvents) 461 throws IOException, InterruptedException { 462 TaskCompletionEvent[] events = job. 463 getTaskCompletionEvents(fromEventId, numEvents); 464 System.out.println("Task completion events for " + job.getJobID()); 465 System.out.println("Number of events (from " + fromEventId + ") are: " 466 + events.length); 467 for(TaskCompletionEvent event: events) { 468 System.out.println(event.getStatus() + " " + 469 event.getTaskAttemptId() + " " + 470 getTaskLogURL(event.getTaskAttemptId(), event.getTaskTrackerHttp())); 471 } 472 } 473 474 protected static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) { 475 return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); 476 } 477 478 479 /** 480 * Dump a list of currently running jobs 481 * @throws IOException 482 */ 483 private void listJobs(Cluster cluster) 484 throws IOException, InterruptedException { 485 List<JobStatus> runningJobs = new ArrayList<JobStatus>(); 486 for (JobStatus job : cluster.getAllJobStatuses()) { 487 if (!job.isJobComplete()) { 488 runningJobs.add(job); 489 } 490 } 491 displayJobList(runningJobs.toArray(new JobStatus[0])); 492 } 493 494 /** 495 * Dump a list of all jobs submitted. 496 * @throws IOException 497 */ 498 private void listAllJobs(Cluster cluster) 499 throws IOException, InterruptedException { 500 displayJobList(cluster.getAllJobStatuses()); 501 } 502 503 /** 504 * Display the list of active trackers 505 */ 506 private void listActiveTrackers(Cluster cluster) 507 throws IOException, InterruptedException { 508 TaskTrackerInfo[] trackers = cluster.getActiveTaskTrackers(); 509 for (TaskTrackerInfo tracker : trackers) { 510 System.out.println(tracker.getTaskTrackerName()); 511 } 512 } 513 514 /** 515 * Display the list of blacklisted trackers 516 */ 517 private void listBlacklistedTrackers(Cluster cluster) 518 throws IOException, InterruptedException { 519 TaskTrackerInfo[] trackers = cluster.getBlackListedTaskTrackers(); 520 if (trackers.length > 0) { 521 System.out.println("BlackListedNode \t Reason"); 522 } 523 for (TaskTrackerInfo tracker : trackers) { 524 System.out.println(tracker.getTaskTrackerName() + "\t" + 525 tracker.getReasonForBlacklist()); 526 } 527 } 528 529 private void printTaskAttempts(TaskReport report) { 530 if (report.getCurrentStatus() == TIPStatus.COMPLETE) { 531 System.out.println(report.getSuccessfulTaskAttemptId()); 532 } else if (report.getCurrentStatus() == TIPStatus.RUNNING) { 533 for (TaskAttemptID t : 534 report.getRunningTaskAttemptIds()) { 535 System.out.println(t); 536 } 537 } 538 } 539 540 /** 541 * Display the information about a job's tasks, of a particular type and 542 * in a particular state 543 * 544 * @param job the job 545 * @param type the type of the task (map/reduce/setup/cleanup) 546 * @param state the state of the task 547 * (pending/running/completed/failed/killed) 548 */ 549 protected void displayTasks(Job job, String type, String state) 550 throws IOException, InterruptedException { 551 TaskReport[] reports = job.getTaskReports(TaskType.valueOf(type)); 552 for (TaskReport report : reports) { 553 TIPStatus status = report.getCurrentStatus(); 554 if ((state.equals("pending") && status ==TIPStatus.PENDING) || 555 (state.equals("running") && status ==TIPStatus.RUNNING) || 556 (state.equals("completed") && status == TIPStatus.COMPLETE) || 557 (state.equals("failed") && status == TIPStatus.FAILED) || 558 (state.equals("killed") && status == TIPStatus.KILLED)) { 559 printTaskAttempts(report); 560 } 561 } 562 } 563 564 public void displayJobList(JobStatus[] jobs) 565 throws IOException, InterruptedException { 566 System.out.println("Total jobs:" + jobs.length); 567 System.out.println("JobId\tState\tStartTime\t" + 568 "UserName\tQueue\tPriority\tMaps\tReduces\tUsedContainers\t" + 569 "RsvdContainers\tUsedMem\tRsvdMem\tNeededMem\tAM info"); 570 for (JobStatus job : jobs) { 571 TaskReport[] mapReports = 572 cluster.getJob(job.getJobID()).getTaskReports(TaskType.MAP); 573 TaskReport[] reduceReports = 574 cluster.getJob(job.getJobID()).getTaskReports(TaskType.REDUCE); 575 576 System.out.printf("%s\t%s\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%dM\t%dM\t%dM\t%s\n", 577 job.getJobID().toString(), job.getState(), job.getStartTime(), 578 job.getUsername(), job.getQueue(), 579 job.getPriority().name(), 580 mapReports.length, 581 reduceReports.length, 582 job.getNumUsedSlots(), 583 job.getNumReservedSlots(), 584 job.getUsedMem(), 585 job.getReservedMem(), 586 job.getNeededMem(), 587 job.getSchedulingInfo()); 588 } 589 } 590 591 public static void main(String[] argv) throws Exception { 592 int res = ToolRunner.run(new CLI(), argv); 593 System.exit(res); 594 } 595 }