001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.util;
016
017 import org.apache.hadoop.conf.Configuration;
018 import org.apache.oozie.service.ConfigurationService;
019
020 import java.util.ArrayList;
021 import java.util.Collection;
022 import java.util.Collections;
023 import java.util.HashMap;
024 import java.util.LinkedHashMap;
025 import java.util.LinkedHashSet;
026 import java.util.List;
027 import java.util.Map;
028 import java.util.Set;
029 import java.util.concurrent.ConcurrentHashMap;
030 import java.util.concurrent.ScheduledExecutorService;
031 import java.util.concurrent.TimeUnit;
032 import java.util.concurrent.atomic.AtomicLong;
033 import java.util.concurrent.locks.Lock;
034 import java.util.concurrent.locks.ReentrantLock;
035
036 /**
037 * Instrumentation framework that supports Timers, Counters, Variables and Sampler instrumentation elements. <p/> All
038 * instrumentation elements have a group and a name.
039 */
040 public class Instrumentation {
041 private ScheduledExecutorService scheduler;
042 private Lock counterLock;
043 private Lock timerLock;
044 private Lock variableLock;
045 private Lock samplerLock;
046 private Configuration configuration;
047 private Map<String, Map<String, Map<String, Object>>> all;
048 private Map<String, Map<String, Element<Long>>> counters;
049 private Map<String, Map<String, Element<Timer>>> timers;
050 private Map<String, Map<String, Element<Variable>>> variables;
051 private Map<String, Map<String, Element<Double>>> samplers;
052
053 /**
054 * Instrumentation constructor.
055 */
056 @SuppressWarnings("unchecked")
057 public Instrumentation() {
058 counterLock = new ReentrantLock();
059 timerLock = new ReentrantLock();
060 variableLock = new ReentrantLock();
061 samplerLock = new ReentrantLock();
062 all = new LinkedHashMap<String, Map<String, Map<String, Object>>>();
063 counters = new ConcurrentHashMap<String, Map<String, Element<Long>>>();
064 timers = new ConcurrentHashMap<String, Map<String, Element<Timer>>>();
065 variables = new ConcurrentHashMap<String, Map<String, Element<Variable>>>();
066 samplers = new ConcurrentHashMap<String, Map<String, Element<Double>>>();
067 all.put("variables", (Map<String, Map<String, Object>>) (Object) variables);
068 all.put("samplers", (Map<String, Map<String, Object>>) (Object) samplers);
069 all.put("counters", (Map<String, Map<String, Object>>) (Object) counters);
070 all.put("timers", (Map<String, Map<String, Object>>) (Object) timers);
071 }
072
073 /**
074 * Set the scheduler instance to handle the samplers.
075 *
076 * @param scheduler scheduler instance.
077 */
078 public void setScheduler(ScheduledExecutorService scheduler) {
079 this.scheduler = scheduler;
080 }
081
082 /**
083 * Cron is a stopwatch that can be started/stopped several times. <p/> This class is not thread safe, it does not
084 * need to be. <p/> It keeps track of the total time (first start to last stop) and the running time (total time
085 * minus the stopped intervals). <p/> Once a Cron is complete it must be added to the corresponding group/name in a
086 * Instrumentation instance.
087 */
088 public static class Cron {
089 private long start;
090 private long end;
091 private long lapStart;
092 private long own;
093 private long total;
094 private boolean running;
095
096 /**
097 * Creates new Cron, stopped, in zero.
098 */
099 public Cron() {
100 running = false;
101 }
102
103 /**
104 * Start the cron. It cannot be already started.
105 */
106 public void start() {
107 if (!running) {
108 if (lapStart == 0) {
109 lapStart = System.currentTimeMillis();
110 if (start == 0) {
111 start = lapStart;
112 end = start;
113 }
114 }
115 running = true;
116 }
117 }
118
119 /**
120 * Stops the cron. It cannot be already stopped.
121 */
122 public void stop() {
123 if (running) {
124 end = System.currentTimeMillis();
125 if (start == 0) {
126 start = end;
127 }
128 total = end - start;
129 if (lapStart > 0) {
130 own += end - lapStart;
131 lapStart = 0;
132 }
133 running = false;
134 }
135 }
136
137 /**
138 * Return the start time of the cron. It must be stopped.
139 *
140 * @return the start time of the cron.
141 */
142 public long getStart() {
143 if (running) {
144 throw new IllegalStateException("Timer running");
145 }
146 return start;
147 }
148
149 /**
150 * Return the end time of the cron. It must be stopped.
151 *
152 * @return the end time of the cron.
153 */
154 public long getEnd() {
155 if (running) {
156 throw new IllegalStateException("Timer running");
157 }
158 return end;
159 }
160
161 /**
162 * Return the total time of the cron. It must be stopped.
163 *
164 * @return the total time of the cron.
165 */
166 public long getTotal() {
167 if (running) {
168 throw new IllegalStateException("Timer running");
169 }
170 return total;
171 }
172
173 /**
174 * Return the own time of the cron. It must be stopped.
175 *
176 * @return the own time of the cron.
177 */
178 public long getOwn() {
179 if (running) {
180 throw new IllegalStateException("Timer running");
181 }
182 return own;
183 }
184
185 }
186
187 /**
188 * Gives access to a snapshot of an Instrumentation element (Counter, Timer). <p/> Instrumentation element snapshots
189 * are returned by the {@link Instrumentation#getCounters()} and {@link Instrumentation#getTimers()} ()} methods.
190 */
191 public interface Element<T> {
192
193 /**
194 * Return the snapshot value of the Intrumentation element.
195 *
196 * @return the snapshot value of the Intrumentation element.
197 */
198 T getValue();
199 }
200
201 /**
202 * Counter Instrumentation element.
203 */
204 private static class Counter extends AtomicLong implements Element<Long> {
205
206 /**
207 * Return the counter snapshot.
208 *
209 * @return the counter snapshot.
210 */
211 public Long getValue() {
212 return get();
213 }
214
215 /**
216 * Return the String representation of the counter value.
217 *
218 * @return the String representation of the counter value.
219 */
220 public String toString() {
221 return Long.toString(get());
222 }
223
224 }
225
226 /**
227 * Timer Instrumentation element.
228 */
229 public static class Timer implements Element<Timer> {
230 Lock lock = new ReentrantLock();
231 private long ownTime;
232 private long totalTime;
233 private long ticks;
234 private long ownSquareTime;
235 private long totalSquareTime;
236 private long ownMinTime;
237 private long ownMaxTime;
238 private long totalMinTime;
239 private long totalMaxTime;
240
241 /**
242 * Timer constructor. <p/> It is project private for test purposes.
243 */
244 Timer() {
245 }
246
247 /**
248 * Return the String representation of the timer value.
249 *
250 * @return the String representation of the timer value.
251 */
252 public String toString() {
253 return XLog.format("ticks[{0}] totalAvg[{1}] ownAvg[{2}]", ticks, getTotalAvg(), getOwnAvg());
254 }
255
256 /**
257 * Return the timer snapshot.
258 *
259 * @return the timer snapshot.
260 */
261 public Timer getValue() {
262 try {
263 lock.lock();
264 Timer timer = new Timer();
265 timer.ownTime = ownTime;
266 timer.totalTime = totalTime;
267 timer.ticks = ticks;
268 timer.ownSquareTime = ownSquareTime;
269 timer.totalSquareTime = totalSquareTime;
270 timer.ownMinTime = ownMinTime;
271 timer.ownMaxTime = ownMaxTime;
272 timer.totalMinTime = totalMinTime;
273 timer.totalMaxTime = totalMaxTime;
274 return timer;
275 }
276 finally {
277 lock.unlock();
278 }
279 }
280
281 /**
282 * Add a cron to a timer. <p/> It is project private for test purposes.
283 *
284 * @param cron Cron to add.
285 */
286 void addCron(Cron cron) {
287 try {
288 lock.lock();
289 long own = cron.getOwn();
290 long total = cron.getTotal();
291 ownTime += own;
292 totalTime += total;
293 ticks++;
294 ownSquareTime += own * own;
295 totalSquareTime += total * total;
296 if (ticks == 1) {
297 ownMinTime = own;
298 ownMaxTime = own;
299 totalMinTime = total;
300 totalMaxTime = total;
301 }
302 else {
303 ownMinTime = Math.min(ownMinTime, own);
304 ownMaxTime = Math.max(ownMaxTime, own);
305 totalMinTime = Math.min(totalMinTime, total);
306 totalMaxTime = Math.max(totalMaxTime, total);
307 }
308 }
309 finally {
310 lock.unlock();
311 }
312 }
313
314 /**
315 * Return the own accumulated computing time by the timer.
316 *
317 * @return own accumulated computing time by the timer.
318 */
319 public long getOwn() {
320 return ownTime;
321 }
322
323 /**
324 * Return the total accumulated computing time by the timer.
325 *
326 * @return total accumulated computing time by the timer.
327 */
328 public long getTotal() {
329 return totalTime;
330 }
331
332 /**
333 * Return the number of times a cron was added to the timer.
334 *
335 * @return the number of times a cron was added to the timer.
336 */
337 public long getTicks() {
338 return ticks;
339 }
340
341 /**
342 * Return the sum of the square own times. <p/> It can be used to calculate the standard deviation.
343 *
344 * @return the sum of the square own timer.
345 */
346 public long getOwnSquareSum() {
347 return ownSquareTime;
348 }
349
350 /**
351 * Return the sum of the square total times. <p/> It can be used to calculate the standard deviation.
352 *
353 * @return the sum of the square own timer.
354 */
355 public long getTotalSquareSum() {
356 return totalSquareTime;
357 }
358
359 /**
360 * Returns the own minimum time.
361 *
362 * @return the own minimum time.
363 */
364 public long getOwnMin() {
365 return ownMinTime;
366 }
367
368 /**
369 * Returns the own maximum time.
370 *
371 * @return the own maximum time.
372 */
373 public long getOwnMax() {
374 return ownMaxTime;
375 }
376
377 /**
378 * Returns the total minimum time.
379 *
380 * @return the total minimum time.
381 */
382 public long getTotalMin() {
383 return totalMinTime;
384 }
385
386 /**
387 * Returns the total maximum time.
388 *
389 * @return the total maximum time.
390 */
391 public long getTotalMax() {
392 return totalMaxTime;
393 }
394
395 /**
396 * Returns the own average time.
397 *
398 * @return the own average time.
399 */
400 public long getOwnAvg() {
401 return (ticks != 0) ? ownTime / ticks : 0;
402 }
403
404 /**
405 * Returns the total average time.
406 *
407 * @return the total average time.
408 */
409 public long getTotalAvg() {
410 return (ticks != 0) ? totalTime / ticks : 0;
411 }
412
413 /**
414 * Returns the total time standard deviation.
415 *
416 * @return the total time standard deviation.
417 */
418 public double getTotalStdDev() {
419 return evalStdDev(ticks, totalTime, totalSquareTime);
420 }
421
422 /**
423 * Returns the own time standard deviation.
424 *
425 * @return the own time standard deviation.
426 */
427 public double getOwnStdDev() {
428 return evalStdDev(ticks, ownTime, ownSquareTime);
429 }
430
431 private double evalStdDev(long n, long sn, long ssn) {
432 return (n < 2) ? -1 : Math.sqrt((n * ssn - sn * sn) / (n * (n - 1)));
433 }
434
435 }
436
437 /**
438 * Add a cron to an instrumentation timer. The timer is created if it does not exists. <p/> This method is thread
439 * safe.
440 *
441 * @param group timer group.
442 * @param name timer name.
443 * @param cron cron to add to the timer.
444 */
445 public void addCron(String group, String name, Cron cron) {
446 Map<String, Element<Timer>> map = timers.get(group);
447 if (map == null) {
448 try {
449 timerLock.lock();
450 map = timers.get(group);
451 if (map == null) {
452 map = new HashMap<String, Element<Timer>>();
453 timers.put(group, map);
454 }
455 }
456 finally {
457 timerLock.unlock();
458 }
459 }
460 Timer timer = (Timer) map.get(name);
461 if (timer == null) {
462 try {
463 timerLock.lock();
464 timer = (Timer) map.get(name);
465 if (timer == null) {
466 timer = new Timer();
467 map.put(name, timer);
468 }
469 }
470 finally {
471 timerLock.unlock();
472 }
473 }
474 timer.addCron(cron);
475 }
476
477 /**
478 * Increment an instrumentation counter. The counter is created if it does not exists. <p/> This method is thread
479 * safe.
480 *
481 * @param group counter group.
482 * @param name counter name.
483 * @param count increment to add to the counter.
484 */
485 public void incr(String group, String name, long count) {
486 Map<String, Element<Long>> map = counters.get(group);
487 if (map == null) {
488 try {
489 counterLock.lock();
490 map = counters.get(group);
491 if (map == null) {
492 map = new HashMap<String, Element<Long>>();
493 counters.put(group, map);
494 }
495 }
496 finally {
497 counterLock.unlock();
498 }
499 }
500 Counter counter = (Counter) map.get(name);
501 if (counter == null) {
502 try {
503 counterLock.lock();
504 counter = (Counter) map.get(name);
505 if (counter == null) {
506 counter = new Counter();
507 map.put(name, counter);
508 }
509 }
510 finally {
511 counterLock.unlock();
512 }
513 }
514 counter.addAndGet(count);
515 }
516
517 /**
518 * Interface for instrumentation variables. <p/> For example a the database service could expose the number of
519 * currently active connections.
520 */
521 public interface Variable<T> extends Element<T> {
522 }
523
524 /**
525 * Add an instrumentation variable. The variable must not exist. <p/> This method is thread safe.
526 *
527 * @param group counter group.
528 * @param name counter name.
529 * @param variable variable to add.
530 */
531 @SuppressWarnings("unchecked")
532 public void addVariable(String group, String name, Variable variable) {
533 Map<String, Element<Variable>> map = variables.get(group);
534 if (map == null) {
535 try {
536 variableLock.lock();
537 map = variables.get(group);
538 if (map == null) {
539 map = new HashMap<String, Element<Variable>>();
540 variables.put(group, map);
541 }
542 }
543 finally {
544 variableLock.unlock();
545 }
546 }
547 if (map.containsKey(name)) {
548 throw new RuntimeException(XLog.format("Variable group=[{0}] name=[{1}] already defined", group, name));
549 }
550 map.put(name, variable);
551 }
552
553 /**
554 * Set the system configuration.
555 *
556 * @param configuration system configuration.
557 */
558 public void setConfiguration(Configuration configuration) {
559 this.configuration = configuration;
560 }
561
562 /**
563 * Return the JVM system properties.
564 *
565 * @return JVM system properties.
566 */
567 @SuppressWarnings("unchecked")
568 public Map<String, String> getJavaSystemProperties() {
569 return (Map<String, String>) (Object) System.getProperties();
570 }
571
572 /**
573 * Return the OS environment used to start Oozie.
574 *
575 * @return the OS environment used to start Oozie.
576 */
577 public Map<String, String> getOSEnv() {
578 return System.getenv();
579 }
580
581 /**
582 * Return the current system configuration as a Map<String,String>.
583 *
584 * @return the current system configuration as a Map<String,String>.
585 */
586 public Map<String, String> getConfiguration() {
587 final Configuration maskedConf = ConfigurationService.maskPasswords(configuration);
588
589 return new Map<String, String>() {
590 public int size() {
591 return maskedConf.size();
592 }
593
594 public boolean isEmpty() {
595 return maskedConf.size() == 0;
596 }
597
598 public boolean containsKey(Object o) {
599 return maskedConf.get((String) o) != null;
600 }
601
602 public boolean containsValue(Object o) {
603 throw new UnsupportedOperationException();
604 }
605
606 public String get(Object o) {
607 return maskedConf.get((String) o);
608 }
609
610 public String put(String s, String s1) {
611 throw new UnsupportedOperationException();
612 }
613
614 public String remove(Object o) {
615 throw new UnsupportedOperationException();
616 }
617
618 public void putAll(Map<? extends String, ? extends String> map) {
619 throw new UnsupportedOperationException();
620 }
621
622 public void clear() {
623 throw new UnsupportedOperationException();
624 }
625
626 public Set<String> keySet() {
627 Set<String> set = new LinkedHashSet<String>();
628 for (Entry<String, String> entry : maskedConf) {
629 set.add(entry.getKey());
630 }
631 return set;
632 }
633
634 public Collection<String> values() {
635 Set<String> set = new LinkedHashSet<String>();
636 for (Entry<String, String> entry : maskedConf) {
637 set.add(entry.getValue());
638 }
639 return set;
640 }
641
642 public Set<Entry<String, String>> entrySet() {
643 Set<Entry<String, String>> set = new LinkedHashSet<Entry<String, String>>();
644 for (Entry<String, String> entry : maskedConf) {
645 set.add(entry);
646 }
647 return set;
648 }
649 };
650 }
651
652 /**
653 * Return all the counters. <p/> This method is thread safe. <p/> The counters are live. The counter value is a
654 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked.
655 *
656 * @return all counters.
657 */
658 public Map<String, Map<String, Element<Long>>> getCounters() {
659 return counters;
660 }
661
662 /**
663 * Return all the timers. <p/> This method is thread safe. <p/> The timers are live. Once a timer is obtained, all
664 * its values are consistent (they are snapshot at the time the {@link Instrumentation.Element#getValue()} is
665 * invoked.
666 *
667 * @return all counters.
668 */
669 public Map<String, Map<String, Element<Timer>>> getTimers() {
670 return timers;
671 }
672
673 /**
674 * Return all the variables. <p/> This method is thread safe. <p/> The variables are live. The variable value is a
675 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked.
676 *
677 * @return all counters.
678 */
679 public Map<String, Map<String, Element<Variable>>> getVariables() {
680 return variables;
681 }
682
683 /**
684 * Return a map containing all variables, counters and timers.
685 *
686 * @return a map containing all variables, counters and timers.
687 */
688 public Map<String, Map<String, Map<String, Object>>> getAll() {
689 return all;
690 }
691
692 /**
693 * Return the string representation of the instrumentation.
694 *
695 * @return the string representation of the instrumentation.
696 */
697 public String toString() {
698 String E = System.getProperty("line.separator");
699 StringBuilder sb = new StringBuilder(4096);
700 for (String element : all.keySet()) {
701 sb.append(element).append(':').append(E);
702 List<String> groups = new ArrayList<String>(all.get(element).keySet());
703 Collections.sort(groups);
704 for (String group : groups) {
705 sb.append(" ").append(group).append(':').append(E);
706 List<String> names = new ArrayList<String>(all.get(element).get(group).keySet());
707 Collections.sort(names);
708 for (String name : names) {
709 sb.append(" ").append(name).append(": ").append(((Element) all.get(element).
710 get(group).get(name)).getValue()).append(E);
711 }
712 }
713 }
714 return sb.toString();
715 }
716
717 private static class Sampler implements Element<Double>, Runnable {
718 private Lock lock = new ReentrantLock();
719 private int samplingInterval;
720 private Variable<Long> variable;
721 private long[] values;
722 private int current;
723 private long valuesSum;
724 private double rate;
725
726 public Sampler(int samplingPeriod, int samplingInterval, Variable<Long> variable) {
727 this.samplingInterval = samplingInterval;
728 this.variable = variable;
729 values = new long[samplingPeriod / samplingInterval];
730 valuesSum = 0;
731 current = -1;
732 }
733
734 public int getSamplingInterval() {
735 return samplingInterval;
736 }
737
738 public void run() {
739 try {
740 lock.lock();
741 long newValue = variable.getValue();
742 if (current == -1) {
743 valuesSum = newValue;
744 current = 0;
745 values[current] = newValue;
746 }
747 else {
748 current = (current + 1) % values.length;
749 valuesSum = valuesSum - values[current] + newValue;
750 values[current] = newValue;
751 }
752 rate = ((double) valuesSum) / values.length;
753 }
754 finally {
755 lock.unlock();
756 }
757 }
758
759 public Double getValue() {
760 return rate;
761 }
762 }
763
764 /**
765 * Add a sampling variable. <p/> This method is thread safe.
766 *
767 * @param group timer group.
768 * @param name timer name.
769 * @param period sampling period to compute rate.
770 * @param interval sampling frequency, how often the variable is probed.
771 * @param variable variable to sample.
772 */
773 public void addSampler(String group, String name, int period, int interval, Variable<Long> variable) {
774 if (scheduler == null) {
775 throw new IllegalStateException("scheduler not set, cannot sample");
776 }
777 try {
778 samplerLock.lock();
779 Map<String, Element<Double>> map = samplers.get(group);
780 if (map == null) {
781 map = samplers.get(group);
782 if (map == null) {
783 map = new HashMap<String, Element<Double>>();
784 samplers.put(group, map);
785 }
786 }
787 if (map.containsKey(name)) {
788 throw new RuntimeException(XLog.format("Sampler group=[{0}] name=[{1}] already defined", group, name));
789 }
790 Sampler sampler = new Sampler(period, interval, variable);
791 map.put(name, sampler);
792 scheduler.scheduleAtFixedRate(sampler, 0, sampler.getSamplingInterval(), TimeUnit.SECONDS);
793 }
794 finally {
795 samplerLock.unlock();
796 }
797 }
798
799 /**
800 * Return all the samplers. <p/> This method is thread safe. <p/> The samplers are live. The sampler value is a
801 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked.
802 *
803 * @return all counters.
804 */
805 public Map<String, Map<String, Element<Double>>> getSamplers() {
806 return samplers;
807 }
808
809 }