No OneTemporary
Actions

Size

18 KB

Subscribers

None

View Options

	diff --git a/bin/prof2-ipol b/bin/prof2-ipol
	--- a/bin/prof2-ipol
	+++ b/bin/prof2-ipol
	@@ -1,309 +1,238 @@
	#! /usr/bin/env python
	from __future__ import division

	"""\
	%prog <runsdir> [<ipolfile>=ipol.dat] [opts]

	Interpolate histo bin values as a function of the parameter space by loading
	run data and parameter lists from run directories in $runsdir (often "mc")

	TODO:
	* Use weight file position matches to exclude some bins, as well as path matching
	* Handle run combination file/string (write a hash of the run list into the ipol filename?)
	* Support asymm error parameterisation
	"""

	import optparse, os, sys
	op = optparse.OptionParser(usage=__doc__)
	op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="Name of the params file to be found in each run directory (default: %default)")
	op.add_option("--limits", dest="LIMITS", default=None, help="Simple text file with parameter limits and fixed parameters")
	op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict ipol building to a subset of bins (default: %default)")
	op.add_option("--order", dest="ORDER", default=3, type=int, help="Global order of polynomials for interpolation (default: %default)")
	op.add_option("--ierr", dest="IERR", default="symm", help="Whether to interpolate MC errors: none, mean, median, symm (default: %default)") #< add rel, asymm
	op.add_option("--eorder", dest="ERR_ORDER", default=None, type=int, help="Global order of polynomials for uncertainty interpolation (default: same as from --order)")
	op.add_option("--rc", dest="RUNCOMBS", default=None, help="Run-combination file")
	# TODO: Add a no-scale option
	# TODO: Change to instead (optionally) specify the max number of parallel threads/procs. Use by default if ncores > 1?
	op.add_option("-j", dest="MULTI", type=int, default=1, help="Number of threads to use")
	op.add_option("--summ", dest="SUMMARY", default=None, help="Summary description to be written to the ipol output file")
	op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="Turn on some debug messages")
	op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="Turn off messages")
	op.add_option("--split", dest="SPLIT", default=None, help="Can incorporate a linear split in parameter space. Provide \"ParamName1 ParamName2 Value1 Value2 Gradient (these describe the line down which to split the plot. Value1 and Value2 form the coords of a point on the line.) ABOVE/BELOW (ABOVE => use runs above the line etc)\"")
	opts, args = op.parse_args()

	## Get mandatory arguments
	if len(args) < 1:
	print "Argument missing... exiting\n\n"
	op.print_usage()
	sys.exit(1)
	RUNSDIR = args[0]
	IFILE = "ipol.dat"
	if len(args) >= 2:
	IFILE = args[1]

	## By default, use the same ipol order for errors as for values
	if opts.ERR_ORDER is None:
	opts.ERR_ORDER = opts.ORDER

	## Load the Professor machinery
	import professor2 as prof
	if not opts.QUIET:
	print prof.logo

	# Read details of split in parameter space if necessary
	if opts.SPLIT != None:
	Split = map(str, opts.SPLIT.split(' '))
	splitParam1 = Split[0]
	splitParam2 = Split[1]
	splitVal1 = float(Split[2])
	splitVal2 = float(Split[3])
	splitGrad = float(Split[4])

	## Load MC run histos and params
	try:
	if RUNSDIR.endswith(".yaml"):
	# TODO: document this
	try:
	import yaml
	except ImportError, e:
	print "Unable to import YAML:", e
	import sys
	sys.exit(1)
	PARAMS, HISTOS = prof.read_all_rundata_yaml(RUNSDIR)
	else:
	PARAMS, HISTOS = prof.read_all_rundata(RUNSDIR, opts.PNAME)
	RUNS, PARAMNAMES, PARAMSLIST = prof.mk_ipolinputs(PARAMS)
	except Exception, e:
	print e
	sys.exit(1)

	## Handle run combns spec
	## e.g. --rc runcombs.dat:4 would use the 4th line of the runcombs.dat file
	if opts.RUNCOMBS is not None:
	f_rc, line = opts.RUNCOMBS.split(":")
	with open(f_rc) as f:
	temp = [l for l in f]
	thisRC = temp[int(line)].split()

	# Filtering and overwriting
	thisRUNS, thisPARAMSLIST = [], []
	for num, r in enumerate(RUNS):
	if r in thisRC:
	thisRUNS.append(r)
	thisPARAMSLIST.append(PARAMSLIST[num])
	RUNS = thisRUNS
	PARAMSLIST = thisPARAMSLIST


	## Some useful announcements about the data loaded and the interpolation planned
	if not opts.QUIET:
	minnruns = prof.numCoeffs(len(PARAMNAMES), opts.ORDER)
	if (len(RUNS) < minnruns):
	print "Not enough runs for order %i polynomials --- would require %i" % (opts.ORDER, minnruns)
	for i in xrange(1, opts.ORDER):
	if (prof.numCoeffs(len(PARAMNAMES), opts.ORDER -i) <= len(RUNS)):
	print "Try order %i (min %i runs)" % (opts.ORDER -i, prof.numCoeffs(len(PARAMNAMES), opts.ORDER -i))
	import sys
	sys.exit(1)
	else:
	print "Building %dD interpolations in %d params: require at least %d runs" % \
	(opts.ORDER, len(PARAMNAMES), prof.numCoeffs(len(PARAMNAMES), opts.ORDER))
	print "Loaded %d distinct observables (%d bins) from %d runs" % \
	(len(HISTOS), sum(hs[RUNS[0]].nbins for hs in HISTOS.values()), len(RUNS))


	## Weight file parsing to select a histos subset
	if opts.WFILE:
	matchers = prof.read_pointmatchers(opts.WFILE)
	for hn in HISTOS.keys():
	if not any(m.match_path(hn) for m in matchers.keys()):
	del HISTOS[hn]
	elif opts.DEBUG:
	print "Observable %s passed weight file path filter" % hn
	print "Filtered observables by path, %d remaining" % len(HISTOS)
	HNAMES = HISTOS.keys()



	-
	-
	## If there's nothing left to interpolate, exit!
	if not HNAMES:
	print "No observables remaining... exiting"
	sys.exit(1)


	## Robustness tests and cleaning: only retain runs that contain every histo
	# TODO: combine with weights histo vetoing -- should we explicitly unload unused run data, or keep it for the next combination to use? Or do we now leave runcombs to the user?
	bad, badnum = [], []
	for irun, run in enumerate(RUNS):
	for hn in HNAMES:
	if not HISTOS[hn].has_key(run):
	bad.append(run)
	badnum.append(irun)
	break
	if opts.LIMITS != None:
	limits, fixed = prof.read_limitsandfixed(opts.LIMITS)
	for irun, run in enumerate(RUNS):
	for inam, nam in enumerate(PARAMNAMES):
	if PARAMSLIST[irun][inam] <= limits[nam][0] or PARAMSLIST[irun][inam] >= limits[nam][1]:
	bad.append(run)
	badnum.append(irun)
	break

	if opts.SPLIT != None:
	ip1 = PARAMNAMES.index(splitParam1)
	ip2 = PARAMNAMES.index(splitParam2)
	for irun, run in enumerate(RUNS):
	ycheck = PARAMSLIST[irun][ip2]-splitGrad*(PARAMSLIST[irun][ip1]-splitVal1)
	if Split[5] == "ABOVE": #if taking values above the split, remove those below
	if ycheck <= splitVal2:
	bad.append(run)
	badnum.append(irun)
	elif Split[5] == "BELOW": #if taking values below the split, remove those above
	if ycheck >= splitVal2:
	bad.append(run)
	badnum.append(irun)
	else:
	print "Error: must specify either ABOVE or BELOW the split"
	sys.exit(1)

	if bad:
	print "Found %d bad runs in %d total... removing" % (len(bad), len(RUNS))
	goodr, goodp = [], []
	for irun, run in enumerate(RUNS):
	if not irun in badnum:
	goodr.append(run)
	goodp.append(PARAMSLIST[irun])
	RUNS = goodr
	PARAMSLIST = goodp

	## If there's nothing left to interpolate, exit!
	if not RUNS:
	print "No valid runs remaining... exiting"
	sys.exit(1)

	+
	+## Make interpolations
	+# TODO: Split this up for parallelisation & CLI reporting
	CONFIG = {"MULTI":opts.MULTI, "ORDER":opts.ORDER, "IERR":opts.IERR, "ERR_ORDER":opts.ERR_ORDER}
	tempDict = prof.mkStandardIpols(HISTOS, HNAMES, RUNS, PARAMSLIST, CONFIG)

	-# BNAMES = []
	-# for hn in HNAMES:
	- # histos = HISTOS[hn]
	- # nbins = histos.values()[0].nbins
	- # for n in xrange(nbins):
	- # BNAMES.append([hn, n])
	-
	-# NBINS=len(BNAMES)
	-
	-# import zlib
	-# def worker(q, rdict, counter):
	- # "Function to make bin ipols and store ipol persistency strings for each histo"
	- # while True:
	- # if q.empty():
	- # break
	- # temp = q.get()
	- # hn=temp[0]
	- # histos = HISTOS[hn]
	- # n = temp[1]
	- # xmax = histos.values()[0].bins[n].xmax
	- # xmin = histos.values()[0].bins[n].xmin
	- # vals = [histos[run].bins[n].val for run in RUNS]
	- # errs = [histos[run].bins[n].err for run in RUNS]
	- # ib = prof.mk_ipolbin(PARAMSLIST, vals, errs, xmin, xmax, opts.ORDER, opts.IERR, opts.ERR_ORDER)
	- # if ib is None:
	- # print "Ignoring", hn, "Bin number", n
	- # else:
	- # s = ""
	- # s += "%s#%d %.5e %.5e\n" % (hn, n, ib.xmin, ib.xmax)
	- # s += " " + ib.ival.toString("val") + "\n"
	- # if ib.ierrs:
	- # s += " " + ib.ierrs.toString("err") + "\n"
	- # # rdict[(hn,n)] = zlib.compress(s, 9) #< save some memory
	- # rdict[(hn,n)] = s #< save some memory
	- # del s
	- # del ib #< pro-actively clear up memory
	- # counter.value+=1
	- # if counter.value==500:
	- # counter.value=0
	- # sys.stderr.write('\rProgress: {0:.1%}'.format(len(rdict.keys())/NBINS))
	-
	-
	-
	-# print "\n\nParametrising %i objects...\n"%len(BNAMES)
	-# import time, multiprocessing
	-# time1 = time.time()
	-
	-# ## A shared memory object is required for coefficient retrieval
	-# from multiprocessing import Manager, Value
	-# manager = Manager()
	-# tempDict = manager.dict()
	-
	-# # This for the status --- modululs is too expensive
	-# ndone=Value('i', 0)
	-
	-# ## The job queue
	-# q = multiprocessing.Queue()
	-
	-# map(lambda x:q.put(x), BNAMES)
	-
	-
	-
	-# ## Fire away
	-# workers = [multiprocessing.Process(target=worker, args=(q, tempDict, ndone)) for i in xrange(opts.MULTI)]
	-# map(lambda x:x.start(), workers)
	-# map(lambda x:x.join(), workers)
	-# # map(lambda x:x.terminate(), workers)
	-
	-
	-# ## Timing
	-# time2 = time.time()
	-# sys.stderr.write('\rParametrisation took %0.2fs.\nWriting output...' % ((time2-time1)))

	## Active memory clean-up
	del HISTOS

	## Write out meta info
	# TODO: Move the format writing into the prof2 Python library
	with open(IFILE, "w") as f:
	if opts.SUMMARY is not None:
	f.write("Summary: %s\n" % opts.SUMMARY)
	f.write("DataDir: %s\n" % os.path.abspath(RUNSDIR))
	f.write("ProfVersion: %s\n" % prof.version())
	f.write("Date: %s\n" % prof.mk_timestamp())
	f.write("DataFormat: binned 2\n") # This tells the reader how to treat the coefficients that follow
	# Format and write out parameter names
	pstring = "ParamNames:"
	for p in PARAMNAMES:
	pstring += " %s" % p
	f.write(pstring + "\n")
	# Dimension (consistency check)
	f.write("Dimension: %i\n" % len(PARAMNAMES))
	# Interpolation validity (hypercube edges)
	minstring = "MinParamVals:"
	for v in prof.mk_minvals(PARAMSLIST):
	minstring += " %f" % v
	f.write(minstring + "\n")
	maxstring = "MaxParamVals:"
	for v in prof.mk_maxvals(PARAMSLIST):
	maxstring += " %f" % v
	f.write(maxstring + "\n")
	f.write("DoParamScaling: 1\n")
	# Number of inputs per bin
	f.write("NumInputs: %i\n" % len(PARAMSLIST))
	s_runs = "Runs:"
	for r in RUNS:
	s_runs +=" %s"%r
	f.write("%s\n"%s_runs)
	f.write("---\n")

	## Write out numerical data for all interpolations
	s = ""
	for hn in sorted(HNAMES):
	- thisbins=sorted(filter(lambda x: x[0]==hn,tempDict.keys()))
	+ thisbins = sorted(filter(lambda x: x[0]==hn, tempDict.keys()))
	for ipolstring in [tempDict[x] for x in thisbins]:
	- s+=ipolstring
	+ s += ipolstring

	# Open file for write/append
	with open(IFILE, "a") as f:
	f.write(s)
	print "\nOutput written to %s" % IFILE
	diff --git a/pyext/professor2/ipol.py b/pyext/professor2/ipol.py
	--- a/pyext/professor2/ipol.py
	+++ b/pyext/professor2/ipol.py
	@@ -1,170 +1,169 @@
	# -- python --

	+from __future__ import division
	from professor2.core import *
	from professor2.histos import *


	def mk_ipolinputs(params):
	"""
	Make sorted run name and parameter name & value lists, suitable for passing to prof.Ipol

	params is a dict (actually, prefer OrderedDict) of run_names -> param_vals,
	as returned from read_rundata
	"""
	runs = sorted(params.keys())
	if not runs:
	return runs, [], [[]]
	paramnames = params[runs[0]].keys()
	paramslist = [[params[run][pn] for pn in paramnames] for run in runs]
	return runs, paramnames, paramslist


	def mk_ipolbin(P, V, E, xmin, xmax, order, errmode, errorder):
	valipol = Ipol(P, V, order)
	# nan check in coeffs
	import math
	if any([math.isnan(x) for x in valipol.coeffs]):
	print "Warning: nan coefficient encountered in value ipol for %s"%histos.values()[0].path
	return None

	## Build the error interpolation(s)
	if not errmode or errmode == "none":
	erripols = None
	## Build the error interpolation(s)
	elif errmode == "mean":
	meanerr = sum(E) / float(len(E)) #histos[run].bins[binnr].err for run in runs) / float(len(runs))
	erripols = Ipol(P, [meanerr], 0) #< const 0th order interpolation
	elif errmode == "median":
	medianerr = E[len(E)//2]
	erripols = Ipol(P, [medianerr], 0) #< const 0th order interpolation
	elif errmode == "symm":
	erripols = Ipol(P, E, order)
	elif errmode == "asymm":
	raise Exception("Error interpolation mode 'asymm' not yet supported")
	# errs0 = [histos[run].bins[n].errs[0] for run in runs]
	# erripol0 = Ipol(paramslist, errs0, order)
	# errs1 = [histos[run].bins[n].errs[1] for run in runs]
	# erripol1 = Ipol(paramslist, errs1, order)
	# erripols = [erripol0, erripol1]
	else:
	raise Exception("Unknown error interpolation mode '%s'" % errmode)
	if erripols is not None:
	if any([math.isnan(x) for x in erripols.coeffs]):
	print "Warning: nan coefficient encountered in error ipol for %s"%histos.values()[0].path
	return None
	return IpolBin(xmin, xmax, valipol, erripols)


	# Keep this for backward compatibility
	def mk_ipolhisto(histos, runs, paramslist, order, errmode=None, errorder=None):
	"""\
	Make a prof.IpolHisto from a dict of prof.DataHistos and the corresponding
	runs and params lists, at the given polynomial order.

	If errs is non-null, the data histo errors will also be interpolated.

	If errmode is None or 'none', uncertainties will not be parameterised and
	will return 0 if queried; 'mean' and 'median' will use fixed values derived
	from the anchor points; 'symm' will parameterise the average of the + and -
	errors of each bin at the polynomial order given by errorder. If errorder is
	None, the same order as for the value parameterisation will be used.

	Parameter range scaling will be applied, so a DoParamScaling=true flag will
	need to be written to the metadata when persisting the resulting IpolHisto.

	"""
	if errmode is None:
	errmode = "none"
	if errorder is None:
	errorder = order
	#
	nbins = len(histos.itervalues().next().bins)
	ibins = []
	for n in xrange(nbins):
	## Check that the bin edges are consistent and extract their values
	# TODO: move bin edge consistency checking into the Histo base class
	xmax = histos.values()[0].bins[n].xmax
	xmin = histos.values()[0].bins[n].xmin
	vals = [histos[run].bins[n].val for run in runs]
	errs = [histos[run].bins[n].err for run in runs]
	ibins.append(mk_ipolbin(paramslist, vals, errs, xmin, xmax, order, errmode, errorder))
	return Histo(ibins, histos.values()[0].path)



	def mkStandardIpols(HISTOS, HNAMES, RUNS, PARAMSLIST, CFG):
	+ # TODO: factorise into per-bin and per-histo functions

	BNAMES = []
	for hn in HNAMES:
	histos = HISTOS[hn]
	nbins = histos.values()[0].nbins
	for n in xrange(nbins):
	BNAMES.append([hn, n])

	- NBINS=len(BNAMES)
	+ NBINS = len(BNAMES)

	- MSGEVERY = int(NBINS/100.);
	+ MSGEVERY = int(NBINS/100.) if NBINS > 100 else 1;

	import sys
	import professor2 as prof
	def worker(q, rdict, counter):
	"Function to make bin ipols and store ipol persistency strings for each histo"
	while True:
	if q.empty():
	break
	temp = q.get()
	- hn=temp[0]
	+ hn = temp[0]
	histos = HISTOS[hn]
	n = temp[1]
	xmax = histos.values()[0].bins[n].xmax
	xmin = histos.values()[0].bins[n].xmin
	vals = [histos[run].bins[n].val for run in RUNS]
	errs = [histos[run].bins[n].err for run in RUNS]
	ib = prof.mk_ipolbin(PARAMSLIST, vals, errs, xmin, xmax, CFG["ORDER"], CFG["IERR"], CFG["ERR_ORDER"])
	if ib is None:
	print "Ignoring", hn, "Bin number", n
	else:
	s = ""
	s += "%s#%d %.5e %.5e\n" % (hn, n, ib.xmin, ib.xmax)
	s += " " + ib.ival.toString("val") + "\n"
	if ib.ierrs:
	s += " " + ib.ierrs.toString("err") + "\n"
	rdict[(hn,n)] = s
	del s
	del ib #< pro-actively clear up memory
	- counter.value+=1
	- if counter.value==MSGEVERY:
	- counter.value=0
	+ counter.value +=1
	+ if counter.value == MSGEVERY:
	+ counter.value = 0
	sys.stderr.write('\rProgress: {0:.1%}'.format(len(rdict.keys())/NBINS))

	-
	-
	- print "\n\nParametrising %i objects...\n"%len(BNAMES)
	+ # TODO: Printing and multiprocessing should happen under script control
	+ print "\n\nParametrising %i objects...\n" % len(BNAMES)
	import time, multiprocessing
	# time1 = time.time()

	## A shared memory object is required for coefficient retrieval
	from multiprocessing import Manager, Value
	manager = Manager()
	tempDict = manager.dict()

	- # This for the status --- modululs is too expensive
	- ndone=Value('i', 0)
	+ # This for the status --- modulus is too expensive
	+ ndone = Value('i', 0)

	## The job queue
	q = multiprocessing.Queue()

	map(lambda x:q.put(x), BNAMES)

	-
	-
	## Fire away
	workers = [multiprocessing.Process(target=worker, args=(q, tempDict, ndone)) for i in xrange(CFG["MULTI"])]
	map(lambda x:x.start(), workers)
	map(lambda x:x.join(), workers)

	# ## Timing
	# time2 = time.time()
	# sys.stderr.write('\rParametrisation took %0.2fs.\nWriting output...' % ((time2-time1)))

	return tempDict

File Metadata

Mime Type: text/x-diff
Expires: Tue, Nov 19, 8:45 PM (1 d, 2 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3806148
Default Alt Text: (18 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions