+Interpolate histo bin values as a function of the parameter space by loading
+from the usual data directory structure $datadir/mc/{rundirs}
+
+TODO:
+ * Use weight file position matches to exclude some bins, as well as path matching
+ * Handle run combination file/string (write a hash of the run list into the ipol filename?)
+ * Support asymm error parameterisation
+"""
+
+import optparse, os, sys
+op = optparse.OptionParser(usage=__doc__)
+op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="Name of the params file to be found in each run directory (default: %default)")
+op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict ipol building to a subset of bins (default: %default)")
+op.add_option("--ierr", dest="IERR", default="median", help="Whether to interpolate MC errors: none, mean, median, symm (default: %default)") #< add rel, asymm
+ print "Filtered observables by path, %d remaining" % len(HISTOS)
+HNAMES = HISTOS.keys()
+
+## If there's nothing left to interpolate, exit!
+if not HNAMES:
+ print "No observables remaining... exiting"
+ sys.exit(1)
+
+
+## Robustness tests and cleaning: only retain runs that contain every histo
+# TODO: combine with weights histo vetoing -- should we explicitly unload unused run data, or keep it for the next combination to use? Or do we now leave runcombs to the user?
+bad, badnum = [], []
+for irun, run in enumerate(RUNS):
+ for hn in HNAMES:
+ if not HISTOS[hn].has_key(run):
+ bad.append(run)
+ badnum.append(irun)
+ break
+if bad:
+ print "Found %d bad runs in %d total... removing" % (len(bad), len(RUNS))
+ goodr, goodp = [], []
+ for irun, run in enumerate(RUNS):
+ if not irun in badnum:
+ goodr.append(run)
+ goodp.append(PARAMSLIST[irun])
+ RUNS = goodr
+ PARAMSLIST = goodp
+
+## If there's nothing left to interpolate, exit!
+if not RUNS:
+ print "No valid runs remaining... exiting"
+ sys.exit(1)
+
+
+IHISTOS = {}
+
+from professor2.ml import *
+
+def worker(q, rdict):
+ "Function to make bin ipols and store ipol persistency strings for each histo"
+ while True:
+ if q.empty():
+ break
+ hn = q.get()
+ histos = HISTOS[hn]
+ ih = mk_MLHisto(histos, RUNS, PARAMSLIST)
+ del HISTOS[hn] #< pro-actively clear up memory
+ rdict[hn] = ih#zlib.compress(s, 9) #< save some memory
+ del histos
+
+
+print "\n\nParametrising...\n"
+import time, multiprocessing
+time1 = time.time()
+
+## A shared memory object is required for coefficient retrieval
+from multiprocessing import Manager
+manager = Manager()
+tempDict = manager.dict()
+
+## The job queue
+q = multiprocessing.Queue()
+map(lambda x:q.put(x), HNAMES)
+
+## Fire away
+workers = [multiprocessing.Process(target=worker, args=(q, tempDict)) for i in range(opts.MULTI)]
+map(lambda x:x.start(), workers)
+map(lambda x:x.join(), workers)
+
+# ## Finally copy the result dictionary into the object itself
+Write out interpolated histograms at a given parameter point.
+
+"""
+
+import optparse, os, sys
+op = optparse.OptionParser(usage=__doc__)
+op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict output to a subset of histograms (default: %default)")
+op.add_option("-o", dest="OUTPUT", default="mlprediction.yoda", help="output file name (default: %default)")
+op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="Turn on some debug messages")
+op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="Turn off messages")
+opts, args = op.parse_args()
+
+
+try:
+ import yoda
+except ImportError:
+ "YODA not found, exiting"
+ import sys
+ sys.exit(1)
+
+
+
+## Get mandatory arguments
+if len(args) < 1:
+ print "Argument missing... exiting\n\n"
+ op.print_usage()
+ sys.exit(1)
+PARAMSTR = args[0]
+IFILE = "ipol.dat"
+if len(args) >= 2:
+ IFILE = args[1]
+
+
+import professor2 as prof
+if not opts.QUIET:
+ print prof.logo
+
+## Parse the param point argument
+PARAMS = None
+if os.path.exists(PARAMSTR):
+ with open(args[0]) as f:
+ PARAMS = [float(l.strip().split()[-1]) for l in f if not l.startswith("#")]
+else:
+ print "No parameter file given or specified param file does not exist, exiting..."
+ sys.exit(1)
+
+## Read ML histos
+
+import cPickle
+MLHISTOS=cPickle.load(open(IFILE, 'rb'))
+
+## Weight file parsing
+if opts.WFILE:
+ matchers = prof.read_pointmatchers(opts.WFILE)
+ for hn in MLHISTOS.keys():
+ if not any(m.match_path(hn) for m in matchers.keys()):
+ del MLHISTOS[hn]
+ if len(MLHISTOS.keys())==0:
+ print "Nothing left after weight file parsing, exiting"
+ sys.exit(0)
+
+
+## Write out ipolhistos
+ofile = opts.OUTPUT
+scatters=[MLHISTOS[k].toDataHisto(PARAMS).toScatter2D() for k in sorted(MLHISTOS.keys())]
+Compute and plot the relative deviations of the given interpolation from the
+corresponding sample points in the given <runsdir>, for both bin values and bin
+errors. Histograms are made for each histogram independently (summed over bins and MC
+points), and for the sum over all histos and bins, and are written to PDFs and
+to a res.yoda data file.
+
+This can be useful for determining whether the interpolation is providing a
+sufficiently faithful approximation to the true observables, and choosing the
+most appropriate order of fit polynomial. It can also be useful to detect
+overfitting, by comparing residuals distributions between the set of runs used
+to make the fit (via prof-ipol) and an equivalent set not included in the fit.
+
+TODO:
+ * Allow control over the output filenames / directory.
+ * Add an option for value-only (no err) residuals
+ * Support runcomb filtering
+"""
+
+from __future__ import print_function
+
+import optparse, os, sys
+op = optparse.OptionParser(usage=__doc__)
+#op.add_option("--ifile", dest="IFILE", default="ipol.dat", help="File from which to read the bin interpolations (default: %default)")
+op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="name of the params file to be found in each run directory (default: %default)")
+op.add_option("--xrange", "--rerange", dest="XRANGE", default=0.1, type=float, help="the +- range of relative deviation to display (default: %default)")
+op.add_option("--nbins", dest="NBINS", default=30, type=int, help="number of bins in relative residuals histograms (default: %default)")
+op.add_option("--tot-only", dest="ONLY_TOT", action="store_true", default=False, help="only make total residuals histograms, not per-observable (default: %default)")
+op.add_option("--logx", dest="LOG_BINS", action="store_true", default=False, help="use a symmetric log binning for better resolution around 0 (default: %default)")
+op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="turn on some debug messages")
+op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="turn off messages")
+Use the interpolations stored in <ipolfile> to find optimised parameters with
+the reference histograms found in the <refdir> as the optimisation target.
+
+The <runsdir> is used to calculate the maximum error value seen for each bin,
+to regularise interpolated errors which could otherwise blow up, leading to
+an unrepresentative small chi2 (and hence fit result) outside the sampled ranges.
+
+TODO:
+ * Handle run combination file/string (write a hash of the run list into the ipol filename?)
+"""
+
+import optparse, os, sys
+op = optparse.OptionParser(usage=__doc__)
+op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file to specify unequal chi2 weights of each bin in the fit (default: %default)")
+op.add_option("--output", dest="OUTPUT", default="mltunes", help="Prefix for outputs (default: %default)")
+op.add_option("--limits", dest="LIMITS", default=None, help="Simple text file with parameter limits and fixed parameters")
+op.add_option("--scan", dest="SCAN", default=None, type=int, help="Perform a brute force scan of SCAN points to find minimiser start point")