Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / Documentation / trace / postprocess / trace-pagealloc-postprocess.pl
diff --git a/kernel/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/kernel/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
new file mode 100644 (file)
index 0000000..0a120aa
--- /dev/null
@@ -0,0 +1,418 @@
+#!/usr/bin/perl
+# This is a POC (proof of concept or piece of crap, take your pick) for reading the
+# text representation of trace output related to page allocation. It makes an attempt
+# to extract some high-level information on what is going on. The accuracy of the parser
+# may vary considerably
+#
+# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
+# other options
+#   --prepend-parent   Report on the parent proc and PID
+#   --read-procstat    If the trace lacks process info, get it from /proc
+#   --ignore-pid       Aggregate processes of the same name together
+#
+# Copyright (c) IBM Corporation 2009
+# Author: Mel Gorman <mel@csn.ul.ie>
+use strict;
+use Getopt::Long;
+
+# Tracepoint events
+use constant MM_PAGE_ALLOC             => 1;
+use constant MM_PAGE_FREE              => 2;
+use constant MM_PAGE_FREE_BATCHED      => 3;
+use constant MM_PAGE_PCPU_DRAIN                => 4;
+use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
+use constant MM_PAGE_ALLOC_EXTFRAG     => 6;
+use constant EVENT_UNKNOWN             => 7;
+
+# Constants used to track state
+use constant STATE_PCPU_PAGES_DRAINED  => 8;
+use constant STATE_PCPU_PAGES_REFILLED => 9;
+
+# High-level events extrapolated from tracepoints
+use constant HIGH_PCPU_DRAINS          => 10;
+use constant HIGH_PCPU_REFILLS         => 11;
+use constant HIGH_EXT_FRAGMENT         => 12;
+use constant HIGH_EXT_FRAGMENT_SEVERE  => 13;
+use constant HIGH_EXT_FRAGMENT_MODERATE        => 14;
+use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
+
+my %perprocesspid;
+my %perprocess;
+my $opt_ignorepid;
+my $opt_read_procstat;
+my $opt_prepend_parent;
+
+# Catch sigint and exit on request
+my $sigint_report = 0;
+my $sigint_exit = 0;
+my $sigint_pending = 0;
+my $sigint_received = 0;
+sub sigint_handler {
+       my $current_time = time;
+       if ($current_time - 2 > $sigint_received) {
+               print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
+               $sigint_report = 1;
+       } else {
+               if (!$sigint_exit) {
+                       print "Second SIGINT received quickly, exiting\n";
+               }
+               $sigint_exit++;
+       }
+
+       if ($sigint_exit > 3) {
+               print "Many SIGINTs received, exiting now without report\n";
+               exit;
+       }
+
+       $sigint_received = $current_time;
+       $sigint_pending = 1;
+}
+$SIG{INT} = "sigint_handler";
+
+# Parse command line options
+GetOptions(
+       'ignore-pid'     =>     \$opt_ignorepid,
+       'read-procstat'  =>     \$opt_read_procstat,
+       'prepend-parent' =>     \$opt_prepend_parent,
+);
+
+# Defaults for dynamically discovered regex's
+my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
+
+# Dyanically discovered regex
+my $regex_fragdetails;
+
+# Static regex used. Specified like this for readability and for use with /o
+#                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details)
+my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
+my $regex_statname = '[-0-9]*\s\((.*)\).*';
+my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
+
+sub generate_traceevent_regex {
+       my $event = shift;
+       my $default = shift;
+       my $regex;
+
+       # Read the event format or use the default
+       if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
+               $regex = $default;
+       } else {
+               my $line;
+               while (!eof(FORMAT)) {
+                       $line = <FORMAT>;
+                       if ($line =~ /^print fmt:\s"(.*)",.*/) {
+                               $regex = $1;
+                               $regex =~ s/%p/\([0-9a-f]*\)/g;
+                               $regex =~ s/%d/\([-0-9]*\)/g;
+                               $regex =~ s/%lu/\([0-9]*\)/g;
+                       }
+               }
+       }
+
+       # Verify fields are in the right order
+       my $tuple;
+       foreach $tuple (split /\s/, $regex) {
+               my ($key, $value) = split(/=/, $tuple);
+               my $expected = shift;
+               if ($key ne $expected) {
+                       print("WARNING: Format not as expected '$key' != '$expected'");
+                       $regex =~ s/$key=\((.*)\)/$key=$1/;
+               }
+       }
+
+       if (defined shift) {
+               die("Fewer fields than expected in format");
+       }
+
+       return $regex;
+}
+$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
+                       $regex_fragdetails_default,
+                       "page", "pfn",
+                       "alloc_order", "fallback_order", "pageblock_order",
+                       "alloc_migratetype", "fallback_migratetype",
+                       "fragmenting", "change_ownership");
+
+sub read_statline($) {
+       my $pid = $_[0];
+       my $statline;
+
+       if (open(STAT, "/proc/$pid/stat")) {
+               $statline = <STAT>;
+               close(STAT);
+       }
+
+       if ($statline eq '') {
+               $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
+       }
+
+       return $statline;
+}
+
+sub guess_process_pid($$) {
+       my $pid = $_[0];
+       my $statline = $_[1];
+
+       if ($pid == 0) {
+               return "swapper-0";
+       }
+
+       if ($statline !~ /$regex_statname/o) {
+               die("Failed to math stat line for process name :: $statline");
+       }
+       return "$1-$pid";
+}
+
+sub parent_info($$) {
+       my $pid = $_[0];
+       my $statline = $_[1];
+       my $ppid;
+
+       if ($pid == 0) {
+               return "NOPARENT-0";
+       }
+
+       if ($statline !~ /$regex_statppid/o) {
+               die("Failed to match stat line process ppid:: $statline");
+       }
+
+       # Read the ppid stat line
+       $ppid = $1;
+       return guess_process_pid($ppid, read_statline($ppid));
+}
+
+sub process_events {
+       my $traceevent;
+       my $process_pid;
+       my $cpus;
+       my $timestamp;
+       my $tracepoint;
+       my $details;
+       my $statline;
+
+       # Read each line of the event log
+EVENT_PROCESS:
+       while ($traceevent = <STDIN>) {
+               if ($traceevent =~ /$regex_traceevent/o) {
+                       $process_pid = $1;
+                       $tracepoint = $4;
+
+                       if ($opt_read_procstat || $opt_prepend_parent) {
+                               $process_pid =~ /(.*)-([0-9]*)$/;
+                               my $process = $1;
+                               my $pid = $2;
+
+                               $statline = read_statline($pid);
+
+                               if ($opt_read_procstat && $process eq '') {
+                                       $process_pid = guess_process_pid($pid, $statline);
+                               }
+
+                               if ($opt_prepend_parent) {
+                                       $process_pid = parent_info($pid, $statline) . " :: $process_pid";
+                               }
+                       }
+
+                       # Unnecessary in this script. Uncomment if required
+                       # $cpus = $2;
+                       # $timestamp = $3;
+               } else {
+                       next;
+               }
+
+               # Perl Switch() sucks majorly
+               if ($tracepoint eq "mm_page_alloc") {
+                       $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
+               } elsif ($tracepoint eq "mm_page_free") {
+                       $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
+               } elsif ($tracepoint eq "mm_page_free_batched") {
+                       $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
+               } elsif ($tracepoint eq "mm_page_pcpu_drain") {
+                       $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
+                       $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
+               } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
+                       $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
+                       $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
+               } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
+
+                       # Extract the details of the event now
+                       $details = $5;
+
+                       my ($page, $pfn);
+                       my ($alloc_order, $fallback_order, $pageblock_order);
+                       my ($alloc_migratetype, $fallback_migratetype);
+                       my ($fragmenting, $change_ownership);
+
+                       if ($details !~ /$regex_fragdetails/o) {
+                               print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
+                               next;
+                       }
+
+                       $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
+                       $page = $1;
+                       $pfn = $2;
+                       $alloc_order = $3;
+                       $fallback_order = $4;
+                       $pageblock_order = $5;
+                       $alloc_migratetype = $6;
+                       $fallback_migratetype = $7;
+                       $fragmenting = $8;
+                       $change_ownership = $9;
+
+                       if ($fragmenting) {
+                               $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
+                               if ($fallback_order <= 3) {
+                                       $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
+                               } else {
+                                       $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
+                               }
+                       }
+                       if ($change_ownership) {
+                               $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
+                       }
+               } else {
+                       $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
+               }
+
+               # Catch a full pcpu drain event
+               if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
+                               $tracepoint ne "mm_page_pcpu_drain") {
+
+                       $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
+                       $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+               }
+
+               # Catch a full pcpu refill event
+               if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
+                               $tracepoint ne "mm_page_alloc_zone_locked") {
+                       $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
+                       $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+               }
+
+               if ($sigint_pending) {
+                       last EVENT_PROCESS;
+               }
+       }
+}
+
+sub dump_stats {
+       my $hashref = shift;
+       my %stats = %$hashref;
+
+       # Dump per-process stats
+       my $process_pid;
+       my $max_strlen = 0;
+
+       # Get the maximum process name
+       foreach $process_pid (keys %perprocesspid) {
+               my $len = length($process_pid);
+               if ($len > $max_strlen) {
+                       $max_strlen = $len;
+               }
+       }
+       $max_strlen += 2;
+
+       printf("\n");
+       printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+               "Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
+       printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+               "details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", "");
+
+       printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+               "",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
+
+       foreach $process_pid (keys %stats) {
+               # Dump final aggregates
+               if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
+                       $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
+                       $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+               }
+               if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
+                       $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
+                       $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+               }
+
+               printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n",
+                       $process_pid,
+                       $stats{$process_pid}->{MM_PAGE_ALLOC},
+                       $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
+                       $stats{$process_pid}->{MM_PAGE_FREE},
+                       $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
+                       $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
+                       $stats{$process_pid}->{HIGH_PCPU_DRAINS},
+                       $stats{$process_pid}->{HIGH_PCPU_REFILLS},
+                       $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
+                       $stats{$process_pid}->{HIGH_EXT_FRAG},
+                       $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
+                       $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
+                       $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
+                       $stats{$process_pid}->{EVENT_UNKNOWN});
+       }
+}
+
+sub aggregate_perprocesspid() {
+       my $process_pid;
+       my $process;
+       undef %perprocess;
+
+       foreach $process_pid (keys %perprocesspid) {
+               $process = $process_pid;
+               $process =~ s/-([0-9])*$//;
+               if ($process eq '') {
+                       $process = "NO_PROCESS_NAME";
+               }
+
+               $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
+               $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
+               $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
+               $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
+               $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
+               $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
+               $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
+               $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
+               $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
+               $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
+               $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
+               $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
+               $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
+       }
+}
+
+sub report() {
+       if (!$opt_ignorepid) {
+               dump_stats(\%perprocesspid);
+       } else {
+               aggregate_perprocesspid();
+               dump_stats(\%perprocess);
+       }
+}
+
+# Process events or signals until neither is available
+sub signal_loop() {
+       my $sigint_processed;
+       do {
+               $sigint_processed = 0;
+               process_events();
+
+               # Handle pending signals if any
+               if ($sigint_pending) {
+                       my $current_time = time;
+
+                       if ($sigint_exit) {
+                               print "Received exit signal\n";
+                               $sigint_pending = 0;
+                       }
+                       if ($sigint_report) {
+                               if ($current_time >= $sigint_received + 2) {
+                                       report();
+                                       $sigint_report = 0;
+                                       $sigint_pending = 0;
+                                       $sigint_processed = 1;
+                               }
+                       }
+               }
+       } while ($sigint_pending || $sigint_processed);
+}
+
+signal_loop();
+report();