#!/usr/bin/perl
## -*- mode: Perl -*-
##
## Copyright (c) 2012 The University of Utah
## All rights reserved.
##
## This file is distributed under the University of Illinois Open Source
## License.  See the file COPYING for details.

######################################################################
#
# This is a generic Delta debugger that is parameterized by an
# interestingness test implemented as a shell script and a collection
# of transformation operators implemented as Perl modules.
#
####################################################################

## TODO: 

# rig each pass so that it can speculate optimistically,
# pessimistically, or not at all

# or there may be better ways to combine these...

# turn off parallel execution when interestingness test gets fast

# autodetect number of processors but permit over-riding from command
# line

# add back in the deterministic mode based on waitpid

use strict;
use warnings;
require 5.10.0;

use FindBin;
use lib $FindBin::Bin, '/usr/share/creduce/perl';
use Exporter::Lite;
use File::Basename;
use File::Which;
use POSIX;
use Regexp::Common;
use Benchmark::Timer;
use Cwd;
use File::Temp;
use File::Copy;
use Sys::CPU;

use creduce_config qw(PACKAGE_STRING);
use creduce_utils;

######################################################################

my $NPROCS = Sys::CPU::cpu_count();    
#my $NPROCS = 1;

# if set, ensure the delta test succeeds before starting each pass
my $SANITY = 0;

# if set, show a nice ascii spinner that tells us how quickly failing
# delta tests are happening (FIXME: currently not working)
my $SPINNER = 0;

# if set, cache results (FIXME: currently not working)
my $CACHE = 0;

######################################################################

my $orig_file_size;

sub print_pct ($) {
    (my $l) = @_;
    my $pct = 100 - ($l*100.0/$orig_file_size);
    printf "(%.1f %%, $l bytes)\n", $pct;
}

# these are set at startup time and never change
my $test;
my $trial_num = 0;   

my $toreduce;
my $toreduce_orig;
my $toreduce_base;
my $dir_base;
my $suffix;

my $ORIG_DIR;

# global invariant: this filename always points to the best delta
# variant we've seen so far
my $toreduce_best;

######################################################################

my $dircounter=0;
sub make_tmpdir () {
    if (1) {
	return File::Temp::tempdir( CLEANUP => 1);
    } else {
	$dircounter++;
	my $dir = "/tmp/_tmp_".getpid()."_".$dircounter;
	mkdir($dir);
	return $dir;
    }
}


my @suffixes = (".c", ".C", ".cc", ".cpp", ".CPP", ".c++", ".cp", ".cxx");

sub run_test ($) {
    (my $fn) = @_;
    my $res = runit "$test $fn >/dev/null 2>&1";
    # my $res = runit "$test $fn";
    return ($res == 0);
}

my $good_cnt;
my $bad_cnt;
my $pass_num = 0;
my %method_worked = ();
my %method_failed = ();
my $old_size = 1000000000;

sub sanity_check () {
    print "sanity check... " if $VERBOSE;

    my $tmpdir = make_tmpdir();
    chdir $tmpdir or die;
    File::Copy::copy($toreduce_best,$toreduce) or die;

    my $res = run_test ($toreduce);
    if (!$res) {
	die "test (and sanity check) fails";
    }
    print "successful\n" if $VERBOSE;
    chdir $ORIG_DIR or die;
    File::Path::rmtree($tmpdir);
}

my %cache = ();
my $cache_hits = 0;
my $test_cnt = 0;

my $cur_key = 0;
sub spinner() {
    my @chars = ("-", "\\", "|", "/");
    my $backsp = "\b";
    if ($cur_key == @chars) {
	$cur_key = 0;
    }
    print $backsp . $chars[$cur_key];
    $cur_key++;
}

sub delta_test ($$$$) {
    (my $method, my $arg, my $state, my $fn) = @_;
    my $prog = read_file($fn);
    my $len = length ($prog);

    my $result;
    if ($CACHE) {
	$result = $cache{$len}{$prog};
    }

    if (defined($result)) {
	$cache_hits++;
    } else {    
	$result = run_test ($fn);
	if ($CACHE) {
	    $cache{$len}{$prog} = $result;
	}
    }

    $test_cnt++;
    
    if ($result) {
	my $size = length ($prog);
	if ($CACHE && ($size < $old_size)) {
	    foreach my $k (keys %cache) {
		if ($k > ($size + 5000)) {
		    $cache{$k} = ();
		}
	    }
	}
	$old_size = $size;
	return 1;
    } else {
	return 0;
    }
}

sub call_prereq_check ($) {
    (my $method) = @_;
    my $str = $method."::check_prereqs";
    no strict "refs";
    if (!(&${str}())) {
	die "prereqs not found for pass $method";
    }
    print "successfully checked prereqs for $method\n" if $VERBOSE;
}

sub call_new ($$$) {
    (my $method,my $fn,my $arg) = @_;    
    my $str = $method."::new";
    no strict "refs";
    return &${str}($fn,$arg);
}

sub call_advance ($$$$) {
    (my $method,my $fn,my $arg,my $state) = @_;    
    my $str = $method."::advance";
    no strict "refs";
    return &${str}($fn,$arg,$state);
}

sub call_transform ($$$$) {
    (my $method,my $fn,my $arg,my $state) = @_;    
    my $str = $method."::transform";
    no strict "refs";
    return &${str}($fn,$arg,$state);
}

my @kids = ();

sub killem () {
    while (1) {
	return if (scalar(@kids) == 0);
	my $kidref = shift @kids;
	(my $pid, my $newsh, my $tmpdir, my $tmpfn) = @{$kidref};
	kill ('TERM', $pid);
	waitpid ($pid, 0);	
	File::Path::rmtree ($tmpdir);	
    }
}

# invariant: parallel execution does not escape this function
sub delta_pass ($) {
    (my $mref) = @_;    
    my $delta_method = ${$mref}{"name"};
    my $delta_arg = ${$mref}{"arg"};
    $good_cnt = 0;
    $bad_cnt = 0;

    die unless (scalar(@kids)==0);

    print "\n" if $VERBOSE;
    print "===< $delta_method :: $delta_arg >===\n";

    my $orig_tmpfn = $toreduce;
    File::Copy::copy($toreduce_best,$orig_tmpfn) or die;
    my $state = call_new ($delta_method,$orig_tmpfn,$delta_arg);

    if ($SANITY) {
	sanity_check();
    }

    my $stopped = 0;
  AGAIN:

    # create child processes until either we've created enough or we get a STOP
    while (!$stopped && scalar(@kids) < $NPROCS) {
	my $tmpdir = make_tmpdir();
	chdir $tmpdir or die;
	my $tmpfn = Cwd::abs_path($orig_tmpfn);
	File::Copy::copy($toreduce_best,$tmpfn) or die;
	(my $delta_res, $state) = call_transform ($delta_method,$tmpfn,$delta_arg,$state);
	die unless ($delta_res == $OK || $delta_res == $STOP);
	if ($delta_res == $STOP) {
	    chdir $ORIG_DIR or die;
	    File::Path::rmtree ($tmpdir);	
	    $stopped = 1;
	} else {

	    # FIXME check the cache here

	    my $pid = fork();
	    die unless ($pid >= 0);
	    my @l = ($pid, $state, $tmpdir, $tmpfn);
	    $state = call_advance ($delta_method,$tmpfn,$delta_arg,$state);
	    push @kids, \@l;
	    my $delta_result;
	    # print "[${pass_num} ${delta_method} :: ${delta_arg} s:$good_cnt f:$bad_cnt] " if $VERBOSE;
	    if ($pid==0) {
		$delta_result = delta_test ($delta_method,$delta_arg,$state,$tmpfn);
		exit ($delta_result);
	    }
	    #print "just forked $pid\n";
	    chdir $ORIG_DIR or die;
	}
    }

    # at this point wait if there's anyone to wait for
    if (scalar(@kids)>0) {	
	my $xpid = wait();
	die if ($xpid==-1);
	my $ret = $?;
	my $delta_result = $ret >> 8;	    

	my $found = 0;
	my $pid;
	my $newsh;
	my $tmpdir;
	my $tmpfn;
	for (my $i=0; $i<scalar(@kids); $i++) {
	    my $kidref = $kids[$i];
	    die unless (scalar(@{$kidref})==4);
	    ($pid, $newsh, $tmpdir, $tmpfn) = @{$kidref};
	    if ($xpid==$pid) {
		$found = 1;
		splice (@kids, $i, 1);
		last;
	    }
	}
	die unless $found;

	# FIXME update the cache here

	if ($delta_result) { 
	    # now that the delta test succeeded, this becomes our new
	    # best version--this has to be done in the parent process
	    killem ();
	    $good_cnt++;
	    $method_worked{$delta_method}{$delta_arg}++;
	    $state = $newsh;
	    $stopped = 0;
	    File::Copy::copy($tmpfn,$toreduce_best) or die;
	    print "success " if $VERBOSE;
	    print_pct(-s $toreduce_best);
	} else {
	    print "failure\n" if $VERBOSE;
	    $bad_cnt++;
	    $method_failed{$delta_method}{$delta_arg}++;
	}
	
	File::Path::rmtree ($tmpdir);

    }

    # pass termination condition
    return if ($stopped && scalar(@kids)==0);

    goto AGAIN;
}

sub usage() {
    print "usage: creduce test_script.sh file.c\n";
    die;
}

my @all_methods = (
    { "name" => "pass_blank",    "arg" => "0",                                     "first_pass_pri" =>   1, },

    { "name" => "pass_lines",    "arg" => "0",                      "pri" => 410,  "first_pass_pri" =>  20,   "last_pass_pri" => 999, },
    { "name" => "pass_lines",    "arg" => "0",                                     "first_pass_pri" =>  21, },
    { "name" => "pass_lines",    "arg" => "0",                                     "first_pass_pri" =>  22, },
    { "name" => "pass_lines",    "arg" => "1",                      "pri" => 411,  "first_pass_pri" =>  23, },
    { "name" => "pass_lines",    "arg" => "1",                                     "first_pass_pri" =>  24, },
    { "name" => "pass_lines",    "arg" => "1",                                     "first_pass_pri" =>  25, },
    { "name" => "pass_lines",    "arg" => "2",                      "pri" => 412,  "first_pass_pri" =>  27, },
    { "name" => "pass_lines",    "arg" => "2",                                     "first_pass_pri" =>  28, },
    { "name" => "pass_lines",    "arg" => "2",                                     "first_pass_pri" =>  29, },
    { "name" => "pass_lines",    "arg" => "10",                     "pri" => 413,  "first_pass_pri" =>  30, },
    { "name" => "pass_lines",    "arg" => "10",                                    "first_pass_pri" =>  31, },
    { "name" => "pass_lines",    "arg" => "10",                                    "first_pass_pri" =>  32, },

    { "name" => "pass_crc",      "arg" => "",                                      "first_pass_pri" => 110, },
    { "name" => "pass_ternary",  "arg" => "b",                      "pri" => 104,  },
    { "name" => "pass_ternary",  "arg" => "c",                      "pri" => 105,  },
    { "name" => "pass_balanced", "arg" => "curly",                  "pri" => 110,  "first_pass_pri" =>  35, },
    { "name" => "pass_balanced", "arg" => "curly2",                 "pri" => 111,  "first_pass_pri" =>  36, },
    { "name" => "pass_balanced", "arg" => "curly3",                 "pri" => 112,  "first_pass_pri" =>  37, },
    { "name" => "pass_balanced", "arg" => "parens",                 "pri" => 113,  },
    { "name" => "pass_balanced", "arg" => "angles",                 "pri" => 114,  },
    { "name" => "pass_balanced", "arg" => "curly-only",             "pri" => 150,  },
    { "name" => "pass_balanced", "arg" => "parens-only",            "pri" => 151,  },
    { "name" => "pass_balanced", "arg" => "angles-only",            "pri" => 152,  },
    { "name" => "pass_clang",    "arg" => "remove-namespace",       "pri" => 200,  },
    { "name" => "pass_clang",    "arg" => "aggregate-to-scalar",    "pri" => 201,  },
   #{ "name" => "pass_clang",    "arg" => "binop-simplification",   "pri" => 201,  },
    { "name" => "pass_clang",    "arg" => "local-to-global",        "pri" => 202,  },
    { "name" => "pass_clang",    "arg" => "param-to-global",        "pri" => 203,  },
    { "name" => "pass_clang",    "arg" => "param-to-local",         "pri" => 204,  },
    { "name" => "pass_clang",    "arg" => "remove-nested-function", "pri" => 205,  },
    { "name" => "pass_clang",    "arg" => "rename-fun",                            "last_pass_pri" => 207,  },
    { "name" => "pass_clang",    "arg" => "union-to-struct",        "pri" => 208,  },
    { "name" => "pass_clang",    "arg" => "rename-param",                          "last_pass_pri" => 209,  },
    { "name" => "pass_clang",    "arg" => "rename-var",                            "last_pass_pri" => 210,  },
    { "name" => "pass_clang",    "arg" => "rename-class",                          "last_pass_pri" => 211,  },
    { "name" => "pass_clang",    "arg" => "return-void",            "pri" => 212,  },
    { "name" => "pass_clang",    "arg" => "simple-inliner",         "pri" => 213,  },
    { "name" => "pass_clang",    "arg" => "reduce-pointer-level",   "pri" => 214,  },
    { "name" => "pass_clang",    "arg" => "lift-assignment-expr",   "pri" => 215,  },
    { "name" => "pass_clang",    "arg" => "copy-propagation",       "pri" => 216,  },
    { "name" => "pass_clang",    "arg" => "callexpr-to-value",      "pri" => 217,  "first_pass_pri" => 49, },
    { "name" => "pass_clang",    "arg" => "replace-callexpr",       "pri" => 218,  "first_pass_pri" => 50, },
    { "name" => "pass_clang",    "arg" => "simplify-callexpr",      "pri" => 219,  "first_pass_pri" => 51, },
    { "name" => "pass_clang",    "arg" => "remove-unused-function", "pri" => 220,  "first_pass_pri" => 33, },
    { "name" => "pass_clang",    "arg" => "remove-unused-enum-member", "pri" => 221, "first_pass_pri" => 51, },
    { "name" => "pass_clang",    "arg" => "remove-enum-member-value", "pri" => 222, "first_pass_pri" => 52, },
    { "name" => "pass_clang",    "arg" => "remove-unused-var",      "pri" => 223,  "first_pass_pri" => 53, },
    { "name" => "pass_clang",    "arg" => "simplify-if",            "pri" => 224,  },
    { "name" => "pass_clang",    "arg" => "reduce-array-dim",       "pri" => 225,  },
    { "name" => "pass_clang",    "arg" => "reduce-array-size",      "pri" => 226,  },
    { "name" => "pass_clang",    "arg" => "move-function-body",     "pri" => 227,  },
    { "name" => "pass_clang",    "arg" => "simplify-comma-expr",    "pri" => 228,  },
    { "name" => "pass_clang",    "arg" => "simplify-dependent-typedef",   "pri" => 229,  },
    { "name" => "pass_clang",    "arg" => "replace-simple-typedef", "pri" => 230,  },
    { "name" => "pass_clang",    "arg" => "remove-unused-field",    "pri" => 231,  },
    { "name" => "pass_clang",    "arg" => "reduce-class-template-param",  "pri" => 232,  },
    { "name" => "pass_clang",    "arg" => "remove-trivial-base-template", "pri" => 233,  },
    { "name" => "pass_clang",    "arg" => "class-template-to-class",      "pri" => 234,  },
    { "name" => "pass_clang",    "arg" => "remove-base-class",      "pri" => 235,  },
    { "name" => "pass_clang",    "arg" => "replace-derived-class",  "pri" => 236,  },
    { "name" => "pass_clang",    "arg" => "remove-unresolved-base", "pri" => 237,  },
    { "name" => "pass_clang",    "arg" => "remove-ctor-initializer","pri" => 238,  },
    { "name" => "pass_clang",    "arg" => "empty-struct-to-int",    "pri" => 239,  },
    { "name" => "pass_clang",    "arg" => "remove-pointer",         "pri" => 240,  },
    { "name" => "pass_clang",    "arg" => "remove-array",           "pri" => 241,  },
    { "name" => "pass_clang",    "arg" => "remove-addr-taken",      "pri" => 242,  },
    { "name" => "pass_clang",    "arg" => "combine-global-var",                    "last_pass_pri" => 990, },
    { "name" => "pass_clang",    "arg" => "combine-local-var",                     "last_pass_pri" => 991, },
    { "name" => "pass_clang",    "arg" => "simplify-struct-union-decl",            "last_pass_pri" => 992, },
    { "name" => "pass_clang",    "arg" => "move-global-var",                       "last_pass_pri" => 993, },
    { "name" => "pass_clang",    "arg" => "unify-function-decl",                   "last_pass_pri" => 994, },
    { "name" => "pass_peep",     "arg" => "a",                      "pri" => 500,  },
    { "name" => "pass_peep",     "arg" => "b",                      "pri" => 501,  },
    { "name" => "pass_ints",     "arg" => "a",                      "pri" => 600,  },
    { "name" => "pass_ints",     "arg" => "b",                      "pri" => 601,  },
    { "name" => "pass_ints",     "arg" => "c",                      "pri" => 602,  },
    { "name" => "pass_ints",     "arg" => "d",                      "pri" => 603,  },
    { "name" => "pass_ints",     "arg" => "e",                      "pri" => 603,  },
    { "name" => "pass_indent",   "arg" => "regular",                "pri" => 1000, },
    { "name" => "pass_indent",   "arg" => "final",                                 "last_pass_pri" => 1000, },
    );

sub pass_iterator ($) {
    (my $which) = @_;
    my $last_pri = -999999;
    my $done = 0;

    return sub {
	my $next;
	foreach my $href (@all_methods) {
	    my %h = %{$href};
	    my $p = $h{$which};
	    next unless defined($p);
	    next unless ($p>$last_pri);
	    if (!defined($next)) {
		$next = $href;
		next;
	    }
	    my %nh = %{$next};
	    if ($p < $nh{$which}) {
		$next = $href;
		next
	    }
	}
	if (defined($next)) {
	    my %nh = %{$next};
	    $last_pri = $nh{$which};
	}
        return $next;
    };
}

############################### main #################################

my $timer = Benchmark::Timer->new();
$timer->start();

my %prereqs_checked;
foreach my $mref (@all_methods) {
    my %method = %{$mref};
    my $mname = $method{"name"};
    die unless defined ($mname);
    next if defined ($prereqs_checked{$mname});
    $prereqs_checked{$mname} = 1;
    eval "require $mname";
    call_prereq_check($mname);
}
print "\n" if $VERBOSE;

$test = Cwd::abs_path(shift @ARGV);
usage unless defined($test);
if (!(-e $test)) {
    print "test script '$test' not found\n";
    usage();
}
if (!(-f $test)) {
    print "test script '$test' is not a plain file\n";
    usage();
}
if (!(-r $test)) {
    print "test script '$test' is not readable\n";
    usage();
}
if (!(-x $test)) {
    print "test script '$test' is not executable\n";
    usage();
}

$toreduce = shift @ARGV;
usage unless defined($toreduce);
if (!(-e $toreduce)) {
    print "'$toreduce' file not found\n";
    usage();
}
if (!(-f $toreduce)) {
    print "'$toreduce' is not a plain file\n";
    usage();
}
if (!(-r $toreduce)) {
    print "'$toreduce' is not readable\n";
    usage();
}
if (!(-w $toreduce)) {
    print "'$toreduce' is not writable\n";
    usage();
}

while (my $arg = shift @ARGV) {
    if ($arg eq "-n") {
	$NPROCS = shift @ARGV;
	die unless defined $NPROCS;
    }
}

print "running $NPROCS interestingness tests in parallel\n";

# Put scratch files ($toreduce_best, $toreduce_orig) in the current
# working directory.
($toreduce_base, $dir_base, $suffix) = fileparse($toreduce,@suffixes);

$ORIG_DIR = getcwd();

# absolute path so we can refer to this file from temporary working
# dirs
$toreduce_best  = Cwd::abs_path("$toreduce_base.best");
$toreduce_orig = "$toreduce_base.orig";

File::Copy::copy($toreduce,$toreduce_orig) or die;
File::Copy::copy($toreduce,$toreduce_best) or die;

my $file_size = -s $toreduce;
$orig_file_size = $file_size;

# unconditionally do this just once since otherwise output is
# confusing when the initial test fails
sanity_check();

# some passes we run first since they often make good headway quickly
print "INITIAL PASS\n" if $VERBOSE;
{
    my $next = pass_iterator("first_pass_pri");
    while (my $item = $next->()) {
	delta_pass ($item);
    }
}

# iterate to global fixpoint
print "MAIN PASSES\n" if $VERBOSE;
$file_size = -s $toreduce;

while (1) {
    my $next = pass_iterator("pri");
    while (my $item = $next->()) {
	delta_pass ($item);
    }
    $pass_num++;
    my $s = -s $toreduce_best;
    print "Termination check: size was $file_size; now $s\n";
    last if ($s >= $file_size);
    $file_size = $s;
}

# some passes we run last since they work best as cleanup
print "CLEANUP PASS\n" if $VERBOSE;
{
    my $next = pass_iterator("last_pass_pri");
    while (my $item = $next->()) {
	delta_pass ($item);
    }
}

print "===================== done ====================\n";

print "\n";
print "pass statistics:\n";
foreach my $mref (sort @all_methods) {
    my $method = ${$mref}{"name"};
    my $arg = ${$mref}{"arg"};
    my $w = $method_worked{$method}{$arg};
    $w=0 unless defined($w);
    my $f = $method_failed{$method}{$arg};
    $f=0 unless defined($f);
    print "  method $method :: $arg worked $w times and failed $f times\n";
}

print "\n";

# this should be the only time we touch the original file
File::Copy::copy($toreduce_best,$toreduce) or die;

print "reduced test case:\n\n";
open INF, "<$toreduce" or die;
while (<INF>) {
    print;
}
close INF;
print "\n";

$timer->stop();
my $time = int($timer->result());
print "elapsed time: $time seconds\n";

######################################################################
