Page MenuHomeHEPForge

No OneTemporary

diff --git a/bin/prof2-ipol b/bin/prof2-ipol
--- a/bin/prof2-ipol
+++ b/bin/prof2-ipol
@@ -1,309 +1,238 @@
#! /usr/bin/env python
from __future__ import division
"""\
%prog <runsdir> [<ipolfile>=ipol.dat] [opts]
Interpolate histo bin values as a function of the parameter space by loading
run data and parameter lists from run directories in $runsdir (often "mc")
TODO:
* Use weight file position matches to exclude some bins, as well as path matching
* Handle run combination file/string (write a hash of the run list into the ipol filename?)
* Support asymm error parameterisation
"""
import optparse, os, sys
op = optparse.OptionParser(usage=__doc__)
op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="Name of the params file to be found in each run directory (default: %default)")
op.add_option("--limits", dest="LIMITS", default=None, help="Simple text file with parameter limits and fixed parameters")
op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict ipol building to a subset of bins (default: %default)")
op.add_option("--order", dest="ORDER", default=3, type=int, help="Global order of polynomials for interpolation (default: %default)")
op.add_option("--ierr", dest="IERR", default="symm", help="Whether to interpolate MC errors: none, mean, median, symm (default: %default)") #< add rel, asymm
op.add_option("--eorder", dest="ERR_ORDER", default=None, type=int, help="Global order of polynomials for uncertainty interpolation (default: same as from --order)")
op.add_option("--rc", dest="RUNCOMBS", default=None, help="Run-combination file")
# TODO: Add a no-scale option
# TODO: Change to instead (optionally) specify the max number of parallel threads/procs. Use by default if ncores > 1?
op.add_option("-j", dest="MULTI", type=int, default=1, help="Number of threads to use")
op.add_option("--summ", dest="SUMMARY", default=None, help="Summary description to be written to the ipol output file")
op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="Turn on some debug messages")
op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="Turn off messages")
op.add_option("--split", dest="SPLIT", default=None, help="Can incorporate a linear split in parameter space. Provide \"ParamName1 ParamName2 Value1 Value2 Gradient (these describe the line down which to split the plot. Value1 and Value2 form the coords of a point on the line.) ABOVE/BELOW (ABOVE => use runs above the line etc)\"")
opts, args = op.parse_args()
## Get mandatory arguments
if len(args) < 1:
print "Argument missing... exiting\n\n"
op.print_usage()
sys.exit(1)
RUNSDIR = args[0]
IFILE = "ipol.dat"
if len(args) >= 2:
IFILE = args[1]
## By default, use the same ipol order for errors as for values
if opts.ERR_ORDER is None:
opts.ERR_ORDER = opts.ORDER
## Load the Professor machinery
import professor2 as prof
if not opts.QUIET:
print prof.logo
# Read details of split in parameter space if necessary
if opts.SPLIT != None:
Split = map(str, opts.SPLIT.split(' '))
splitParam1 = Split[0]
splitParam2 = Split[1]
splitVal1 = float(Split[2])
splitVal2 = float(Split[3])
splitGrad = float(Split[4])
## Load MC run histos and params
try:
if RUNSDIR.endswith(".yaml"):
# TODO: document this
try:
import yaml
except ImportError, e:
print "Unable to import YAML:", e
import sys
sys.exit(1)
PARAMS, HISTOS = prof.read_all_rundata_yaml(RUNSDIR)
else:
PARAMS, HISTOS = prof.read_all_rundata(RUNSDIR, opts.PNAME)
RUNS, PARAMNAMES, PARAMSLIST = prof.mk_ipolinputs(PARAMS)
except Exception, e:
print e
sys.exit(1)
## Handle run combns spec
## e.g. --rc runcombs.dat:4 would use the 4th line of the runcombs.dat file
if opts.RUNCOMBS is not None:
f_rc, line = opts.RUNCOMBS.split(":")
with open(f_rc) as f:
temp = [l for l in f]
thisRC = temp[int(line)].split()
# Filtering and overwriting
thisRUNS, thisPARAMSLIST = [], []
for num, r in enumerate(RUNS):
if r in thisRC:
thisRUNS.append(r)
thisPARAMSLIST.append(PARAMSLIST[num])
RUNS = thisRUNS
PARAMSLIST = thisPARAMSLIST
## Some useful announcements about the data loaded and the interpolation planned
if not opts.QUIET:
minnruns = prof.numCoeffs(len(PARAMNAMES), opts.ORDER)
if (len(RUNS) < minnruns):
print "Not enough runs for order %i polynomials --- would require %i" % (opts.ORDER, minnruns)
for i in xrange(1, opts.ORDER):
if (prof.numCoeffs(len(PARAMNAMES), opts.ORDER -i) <= len(RUNS)):
print "Try order %i (min %i runs)" % (opts.ORDER -i, prof.numCoeffs(len(PARAMNAMES), opts.ORDER -i))
import sys
sys.exit(1)
else:
print "Building %dD interpolations in %d params: require at least %d runs" % \
(opts.ORDER, len(PARAMNAMES), prof.numCoeffs(len(PARAMNAMES), opts.ORDER))
print "Loaded %d distinct observables (%d bins) from %d runs" % \
(len(HISTOS), sum(hs[RUNS[0]].nbins for hs in HISTOS.values()), len(RUNS))
## Weight file parsing to select a histos subset
if opts.WFILE:
matchers = prof.read_pointmatchers(opts.WFILE)
for hn in HISTOS.keys():
if not any(m.match_path(hn) for m in matchers.keys()):
del HISTOS[hn]
elif opts.DEBUG:
print "Observable %s passed weight file path filter" % hn
print "Filtered observables by path, %d remaining" % len(HISTOS)
HNAMES = HISTOS.keys()
-
-
## If there's nothing left to interpolate, exit!
if not HNAMES:
print "No observables remaining... exiting"
sys.exit(1)
## Robustness tests and cleaning: only retain runs that contain every histo
# TODO: combine with weights histo vetoing -- should we explicitly unload unused run data, or keep it for the next combination to use? Or do we now leave runcombs to the user?
bad, badnum = [], []
for irun, run in enumerate(RUNS):
for hn in HNAMES:
if not HISTOS[hn].has_key(run):
bad.append(run)
badnum.append(irun)
break
if opts.LIMITS != None:
limits, fixed = prof.read_limitsandfixed(opts.LIMITS)
for irun, run in enumerate(RUNS):
for inam, nam in enumerate(PARAMNAMES):
if PARAMSLIST[irun][inam] <= limits[nam][0] or PARAMSLIST[irun][inam] >= limits[nam][1]:
bad.append(run)
badnum.append(irun)
break
if opts.SPLIT != None:
ip1 = PARAMNAMES.index(splitParam1)
ip2 = PARAMNAMES.index(splitParam2)
for irun, run in enumerate(RUNS):
ycheck = PARAMSLIST[irun][ip2]-splitGrad*(PARAMSLIST[irun][ip1]-splitVal1)
if Split[5] == "ABOVE": #if taking values above the split, remove those below
if ycheck <= splitVal2:
bad.append(run)
badnum.append(irun)
elif Split[5] == "BELOW": #if taking values below the split, remove those above
if ycheck >= splitVal2:
bad.append(run)
badnum.append(irun)
else:
print "Error: must specify either ABOVE or BELOW the split"
sys.exit(1)
if bad:
print "Found %d bad runs in %d total... removing" % (len(bad), len(RUNS))
goodr, goodp = [], []
for irun, run in enumerate(RUNS):
if not irun in badnum:
goodr.append(run)
goodp.append(PARAMSLIST[irun])
RUNS = goodr
PARAMSLIST = goodp
## If there's nothing left to interpolate, exit!
if not RUNS:
print "No valid runs remaining... exiting"
sys.exit(1)
+
+## Make interpolations
+# TODO: Split this up for parallelisation & CLI reporting
CONFIG = {"MULTI":opts.MULTI, "ORDER":opts.ORDER, "IERR":opts.IERR, "ERR_ORDER":opts.ERR_ORDER}
tempDict = prof.mkStandardIpols(HISTOS, HNAMES, RUNS, PARAMSLIST, CONFIG)
-# BNAMES = []
-# for hn in HNAMES:
- # histos = HISTOS[hn]
- # nbins = histos.values()[0].nbins
- # for n in xrange(nbins):
- # BNAMES.append([hn, n])
-
-# NBINS=len(BNAMES)
-
-# import zlib
-# def worker(q, rdict, counter):
- # "Function to make bin ipols and store ipol persistency strings for each histo"
- # while True:
- # if q.empty():
- # break
- # temp = q.get()
- # hn=temp[0]
- # histos = HISTOS[hn]
- # n = temp[1]
- # xmax = histos.values()[0].bins[n].xmax
- # xmin = histos.values()[0].bins[n].xmin
- # vals = [histos[run].bins[n].val for run in RUNS]
- # errs = [histos[run].bins[n].err for run in RUNS]
- # ib = prof.mk_ipolbin(PARAMSLIST, vals, errs, xmin, xmax, opts.ORDER, opts.IERR, opts.ERR_ORDER)
- # if ib is None:
- # print "Ignoring", hn, "Bin number", n
- # else:
- # s = ""
- # s += "%s#%d %.5e %.5e\n" % (hn, n, ib.xmin, ib.xmax)
- # s += " " + ib.ival.toString("val") + "\n"
- # if ib.ierrs:
- # s += " " + ib.ierrs.toString("err") + "\n"
- # # rdict[(hn,n)] = zlib.compress(s, 9) #< save some memory
- # rdict[(hn,n)] = s #< save some memory
- # del s
- # del ib #< pro-actively clear up memory
- # counter.value+=1
- # if counter.value==500:
- # counter.value=0
- # sys.stderr.write('\rProgress: {0:.1%}'.format(len(rdict.keys())/NBINS))
-
-
-
-# print "\n\nParametrising %i objects...\n"%len(BNAMES)
-# import time, multiprocessing
-# time1 = time.time()
-
-# ## A shared memory object is required for coefficient retrieval
-# from multiprocessing import Manager, Value
-# manager = Manager()
-# tempDict = manager.dict()
-
-# # This for the status --- modululs is too expensive
-# ndone=Value('i', 0)
-
-# ## The job queue
-# q = multiprocessing.Queue()
-
-# map(lambda x:q.put(x), BNAMES)
-
-
-
-# ## Fire away
-# workers = [multiprocessing.Process(target=worker, args=(q, tempDict, ndone)) for i in xrange(opts.MULTI)]
-# map(lambda x:x.start(), workers)
-# map(lambda x:x.join(), workers)
-# # map(lambda x:x.terminate(), workers)
-
-
-# ## Timing
-# time2 = time.time()
-# sys.stderr.write('\rParametrisation took %0.2fs.\nWriting output...' % ((time2-time1)))
## Active memory clean-up
del HISTOS
## Write out meta info
# TODO: Move the format writing into the prof2 Python library
with open(IFILE, "w") as f:
if opts.SUMMARY is not None:
f.write("Summary: %s\n" % opts.SUMMARY)
f.write("DataDir: %s\n" % os.path.abspath(RUNSDIR))
f.write("ProfVersion: %s\n" % prof.version())
f.write("Date: %s\n" % prof.mk_timestamp())
f.write("DataFormat: binned 2\n") # This tells the reader how to treat the coefficients that follow
# Format and write out parameter names
pstring = "ParamNames:"
for p in PARAMNAMES:
pstring += " %s" % p
f.write(pstring + "\n")
# Dimension (consistency check)
f.write("Dimension: %i\n" % len(PARAMNAMES))
# Interpolation validity (hypercube edges)
minstring = "MinParamVals:"
for v in prof.mk_minvals(PARAMSLIST):
minstring += " %f" % v
f.write(minstring + "\n")
maxstring = "MaxParamVals:"
for v in prof.mk_maxvals(PARAMSLIST):
maxstring += " %f" % v
f.write(maxstring + "\n")
f.write("DoParamScaling: 1\n")
# Number of inputs per bin
f.write("NumInputs: %i\n" % len(PARAMSLIST))
s_runs = "Runs:"
for r in RUNS:
s_runs +=" %s"%r
f.write("%s\n"%s_runs)
f.write("---\n")
## Write out numerical data for all interpolations
s = ""
for hn in sorted(HNAMES):
- thisbins=sorted(filter(lambda x: x[0]==hn,tempDict.keys()))
+ thisbins = sorted(filter(lambda x: x[0]==hn, tempDict.keys()))
for ipolstring in [tempDict[x] for x in thisbins]:
- s+=ipolstring
+ s += ipolstring
# Open file for write/append
with open(IFILE, "a") as f:
f.write(s)
print "\nOutput written to %s" % IFILE
diff --git a/pyext/professor2/ipol.py b/pyext/professor2/ipol.py
--- a/pyext/professor2/ipol.py
+++ b/pyext/professor2/ipol.py
@@ -1,170 +1,169 @@
# -*- python -*-
+from __future__ import division
from professor2.core import *
from professor2.histos import *
def mk_ipolinputs(params):
"""
Make sorted run name and parameter name & value lists, suitable for passing to prof.Ipol
params is a dict (actually, prefer OrderedDict) of run_names -> param_vals,
as returned from read_rundata
"""
runs = sorted(params.keys())
if not runs:
return runs, [], [[]]
paramnames = params[runs[0]].keys()
paramslist = [[params[run][pn] for pn in paramnames] for run in runs]
return runs, paramnames, paramslist
def mk_ipolbin(P, V, E, xmin, xmax, order, errmode, errorder):
valipol = Ipol(P, V, order)
# nan check in coeffs
import math
if any([math.isnan(x) for x in valipol.coeffs]):
print "Warning: nan coefficient encountered in value ipol for %s"%histos.values()[0].path
return None
## Build the error interpolation(s)
if not errmode or errmode == "none":
erripols = None
## Build the error interpolation(s)
elif errmode == "mean":
meanerr = sum(E) / float(len(E)) #histos[run].bins[binnr].err for run in runs) / float(len(runs))
erripols = Ipol(P, [meanerr], 0) #< const 0th order interpolation
elif errmode == "median":
medianerr = E[len(E)//2]
erripols = Ipol(P, [medianerr], 0) #< const 0th order interpolation
elif errmode == "symm":
erripols = Ipol(P, E, order)
elif errmode == "asymm":
raise Exception("Error interpolation mode 'asymm' not yet supported")
# errs0 = [histos[run].bins[n].errs[0] for run in runs]
# erripol0 = Ipol(paramslist, errs0, order)
# errs1 = [histos[run].bins[n].errs[1] for run in runs]
# erripol1 = Ipol(paramslist, errs1, order)
# erripols = [erripol0, erripol1]
else:
raise Exception("Unknown error interpolation mode '%s'" % errmode)
if erripols is not None:
if any([math.isnan(x) for x in erripols.coeffs]):
print "Warning: nan coefficient encountered in error ipol for %s"%histos.values()[0].path
return None
return IpolBin(xmin, xmax, valipol, erripols)
# Keep this for backward compatibility
def mk_ipolhisto(histos, runs, paramslist, order, errmode=None, errorder=None):
"""\
Make a prof.IpolHisto from a dict of prof.DataHistos and the corresponding
runs and params lists, at the given polynomial order.
If errs is non-null, the data histo errors will also be interpolated.
If errmode is None or 'none', uncertainties will not be parameterised and
will return 0 if queried; 'mean' and 'median' will use fixed values derived
from the anchor points; 'symm' will parameterise the average of the + and -
errors of each bin at the polynomial order given by errorder. If errorder is
None, the same order as for the value parameterisation will be used.
Parameter range scaling will be applied, so a DoParamScaling=true flag will
need to be written to the metadata when persisting the resulting IpolHisto.
"""
if errmode is None:
errmode = "none"
if errorder is None:
errorder = order
#
nbins = len(histos.itervalues().next().bins)
ibins = []
for n in xrange(nbins):
## Check that the bin edges are consistent and extract their values
# TODO: move bin edge consistency checking into the Histo base class
xmax = histos.values()[0].bins[n].xmax
xmin = histos.values()[0].bins[n].xmin
vals = [histos[run].bins[n].val for run in runs]
errs = [histos[run].bins[n].err for run in runs]
ibins.append(mk_ipolbin(paramslist, vals, errs, xmin, xmax, order, errmode, errorder))
return Histo(ibins, histos.values()[0].path)
def mkStandardIpols(HISTOS, HNAMES, RUNS, PARAMSLIST, CFG):
+ # TODO: factorise into per-bin and per-histo functions
BNAMES = []
for hn in HNAMES:
histos = HISTOS[hn]
nbins = histos.values()[0].nbins
for n in xrange(nbins):
BNAMES.append([hn, n])
- NBINS=len(BNAMES)
+ NBINS = len(BNAMES)
- MSGEVERY = int(NBINS/100.);
+ MSGEVERY = int(NBINS/100.) if NBINS > 100 else 1;
import sys
import professor2 as prof
def worker(q, rdict, counter):
"Function to make bin ipols and store ipol persistency strings for each histo"
while True:
if q.empty():
break
temp = q.get()
- hn=temp[0]
+ hn = temp[0]
histos = HISTOS[hn]
n = temp[1]
xmax = histos.values()[0].bins[n].xmax
xmin = histos.values()[0].bins[n].xmin
vals = [histos[run].bins[n].val for run in RUNS]
errs = [histos[run].bins[n].err for run in RUNS]
ib = prof.mk_ipolbin(PARAMSLIST, vals, errs, xmin, xmax, CFG["ORDER"], CFG["IERR"], CFG["ERR_ORDER"])
if ib is None:
print "Ignoring", hn, "Bin number", n
else:
s = ""
s += "%s#%d %.5e %.5e\n" % (hn, n, ib.xmin, ib.xmax)
s += " " + ib.ival.toString("val") + "\n"
if ib.ierrs:
s += " " + ib.ierrs.toString("err") + "\n"
rdict[(hn,n)] = s
del s
del ib #< pro-actively clear up memory
- counter.value+=1
- if counter.value==MSGEVERY:
- counter.value=0
+ counter.value +=1
+ if counter.value == MSGEVERY:
+ counter.value = 0
sys.stderr.write('\rProgress: {0:.1%}'.format(len(rdict.keys())/NBINS))
-
-
- print "\n\nParametrising %i objects...\n"%len(BNAMES)
+ # TODO: Printing and multiprocessing should happen under script control
+ print "\n\nParametrising %i objects...\n" % len(BNAMES)
import time, multiprocessing
# time1 = time.time()
## A shared memory object is required for coefficient retrieval
from multiprocessing import Manager, Value
manager = Manager()
tempDict = manager.dict()
- # This for the status --- modululs is too expensive
- ndone=Value('i', 0)
+ # This for the status --- modulus is too expensive
+ ndone = Value('i', 0)
## The job queue
q = multiprocessing.Queue()
map(lambda x:q.put(x), BNAMES)
-
-
## Fire away
workers = [multiprocessing.Process(target=worker, args=(q, tempDict, ndone)) for i in xrange(CFG["MULTI"])]
map(lambda x:x.start(), workers)
map(lambda x:x.join(), workers)
# ## Timing
# time2 = time.time()
# sys.stderr.write('\rParametrisation took %0.2fs.\nWriting output...' % ((time2-time1)))
return tempDict

File Metadata

Mime Type
text/x-diff
Expires
Tue, Nov 19, 8:45 PM (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3806148
Default Alt Text
(18 KB)

Event Timeline