No OneTemporary
Actions

Size

14 KB

Subscribers

None

View Options

	diff --git a/bin/prof2-envelopes b/bin/prof2-envelopes
	--- a/bin/prof2-envelopes
	+++ b/bin/prof2-envelopes
	@@ -1,184 +1,185 @@
	#! /usr/bin/env python

	"""\
	%prog <runsdir> [datafile directory] [opts]

	Envelope of input data
	"""

	import matplotlib, os
	matplotlib.use(os.environ.get("MPL_BACKEND", "Agg"))
	import optparse, os, sys
	op = optparse.OptionParser(usage=__doc__)
	op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="Name of the params file to be found in each run directory (default: %default)")
	op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict plotting to a subset of histograms (default: %default)")
	op.add_option("-o", "--outdir", dest="OUTDIR", default="envelopes", help="Output folder for plots (default: %default)")
	op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="Turn on some debug messages")
	op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="Turn off messages")
	opts, args = op.parse_args()


	# TODO: add ipol envelope!

	## Get mandatory arguments
	if len(args) < 1:
	print "Argument missing... exiting\n\n"
	op.print_usage()
	sys.exit(1)
	RUNSDIR = args[0]

	if len(args)==2:
	REFDIR = args[1]
	else:
	print "No data directory given"
	REFDIR = None

	## Load the Professor machinery
	import professor2 as prof
	if not opts.QUIET:
	print prof.logo


	## Load MC run histos and params
	import glob
	INDIRS = glob.glob(os.path.join(RUNSDIR, "*"))
	try:
	PARAMS, HISTOS = prof.read_rundata(INDIRS, opts.PNAME)
	+ print
	except Exception, e:
	print e
	sys.exit(1)

	## Weight file parsing to select a histos subset
	if opts.WFILE:
	matchers = prof.read_pointmatchers(opts.WFILE)
	for hn in HISTOS.keys():
	if not any(m.match_path(hn) for m in matchers.keys()):
	del HISTOS[hn]
	elif opts.DEBUG:
	print "Observable %s passed weight file path filter" % hn
	print "Filtered observables by path, %d remaining" % len(HISTOS)
	HNAMES = HISTOS.keys()

	## If there's nothing left to interpolate, exit!
	if not HNAMES:
	print "No observables remaining... exiting"
	sys.exit(1)

	def mk_envelope(histos):
	""" Take DataHistos and return coordinates for plotting """
	# Iterate over bins, get envelope representation
	# For each bin, return [xmin, xmax, mean(y), min(y), max(y)]
	E=[]
	for num, b in enumerate(histos[histos.keys()[0]].bins):
	t_b = [x.bins[num].val for x in histos.values()]
	E.append([b.xmin, b.xmax, sum(t_b)/len(t_b), min(t_b), max(t_b)])
	return E

	def mk_data(histo):
	""" Take a DataHisto and return coordinates for plotting """
	d= []
	for num, b in enumerate(histo.bins):
	d.append([b.xmid, min(b.xmax-b.xmid, b.xmid- b.xmin), b.val, b.err])
	return d

	def plot_envelope(env, data=None, name="envelope"):
	import matplotlib.pyplot as plt
	import matplotlib as mpl
	# This is the Main figure object
	- fig=plt.figure(figsize=(8,6), dpi=100)
	+ fig = plt.figure(figsize=(8,6), dpi=100)

	# This sets up the grid for main and ratio plot
	gs = mpl.gridspec.GridSpec(1, 1)#, height_ratios=[3,1], hspace=0)

	# Create a main plot
	axmain = fig.add_subplot(gs[0])
	axmain.set_ylabel("$f(x)$")
	axmain.set_xlabel("$x=$ %s"%name)

	# Convenience stuff for plotting clarity
	X, UPPER, LOWER, MEAN = [], [], [], []
	for num, b in enumerate(env):
	ymean = b[-3]
	ymin = b[-2]
	ymax = b[-1]
	xmin = b[0]
	xmax = b[1]
	X.append(xmin)
	X.append(xmax)
	UPPER.append(ymax)
	UPPER.append(ymax)
	LOWER.append(ymin)
	LOWER.append(ymin)
	MEAN.append( ymean )
	MEAN.append( ymean )

	# Envelope
	axmain.fill_between(X, LOWER, UPPER, edgecolor="none", facecolor='yellow', interpolate=False)
	# Mean of envelope
	axmain.plot(X, MEAN, "b-", label="Mean")

	# Data plot
	if data is not None:
	Xdata = [x[0] for x in data]
	dX = [x[1] for x in data]
	Ydata = [x[2] for x in data]
	dY = [x[3] for x in data]
	axmain.errorbar(Xdata, Ydata, dY, dX, fmt="k.", linewidth=1.3, label="Data")


	# Switch off the frame in the legend
	leg = axmain.legend(loc=0, numpoints=1)
	- fr=leg.get_frame()
	+ fr = leg.get_frame()
	fr.set_visible(False)

	# Remove all unnecessary white space
	plt.tight_layout()

	# Output dir, path
	import os
	if not os.path.exists(opts.OUTDIR):
	os.makedirs(opts.OUTDIR)
	outname = os.path.join(opts.OUTDIR, "%s.pdf"%name)

	# Save image as PDF
	plt.savefig(outname)
	- plt.clf()
	+ plt.close(fig)


	## Read reference data histos
	import os, sys, glob
	DHISTOS = {}
	if REFDIR is not None:
	REFFILES = glob.glob(os.path.join(REFDIR, "*"))
	for rf in REFFILES:
	DHISTOS.update(prof.read_histos(rf))
	DNAMES={}
	# filtering
	for h in HNAMES:
	for d in DHISTOS.keys():
	if h in d:
	DNAMES[h]=d
	# Free memory
	for d in DHISTOS.keys():
	if not d in DNAMES.values():
	del DHISTOS[d]

	# Plotting
	for hname in HISTOS.keys():
	h=HISTOS[hname]
	e=mk_envelope(h)
	# Sanitisation for label and output file name
	name=hname.lstrip("/").replace("/","_")

	# Plot data too
	if REFDIR is not None and hname in DNAMES.keys():
	data = mk_data(DHISTOS[DNAMES[hname]])
	plot_envelope(e, data, name)
	else:
	plot_envelope(e, name=name)


	if not opts.QUIET:
	print "Plots written to %s"%opts.OUTDIR

	sys.exit(0)
	diff --git a/pyext/professor2/dataio.py b/pyext/professor2/dataio.py
	--- a/pyext/professor2/dataio.py
	+++ b/pyext/professor2/dataio.py
	@@ -1,219 +1,219 @@
	# -- python --

	from professor2.histos import *
	from professor2.paramsio import *
	import os.path, glob


	def read_histos_root(path):
	"Load histograms from a ROOT file, into a dict of path -> yoda.Histo[DataBin]"
	histos = {}

	# TODO: Could just use YODA for everything, including ROOT reading?
	try:
	import ROOT
	ROOT.gROOT.SetBatch(True)
	except ImportError:
	print "PyROOT not available... skipping"
	return histos

	# TODO: use yoda.root.getall
	def _getallrootobjs(d, basepath="/"):
	"Recurse through a ROOT file/dir and generate (path, obj) pairs"
	for key in d.GetListOfKeys():
	kname = key.GetName()
	if key.IsFolder():
	# TODO: -> "yield from" in Py3
	for i in _getallrootobjs(d.Get(kname), basepath+kname+"/"):
	yield i
	else:
	yield basepath+kname, d.Get(kname)
	try:
	f = ROOT.TFile(path)
	for rname, robj in _getallrootobjs(f):
	bins = []
	if robj.InheritsFrom("TH1"):
	# TODO: allow 2D histos
	if robj.InheritsFrom("TH2"):
	continue
	for ib in xrange(robj.GetNbinsX()):
	xmin = robj.GetXaxis().GetBinLowEdge(ib+1)
	xmax = robj.GetXaxis().GetBinUpEdge(ib+1)
	y = robj.GetBinContent(ib+1)
	ey = robj.GetBinError(ib+1)
	bins.append(DataBin(xmin, xmax, y, ey))
	histos[rname] = Histo(bins, rname)
	elif robj.InheritsFrom("TGraph"):
	for ip in xrange(robj.GetN()):
	x, y = ROOT.Double(), ROOT.Double()
	robj.GetPoint(ip, x, y)
	xmin = x - robj.GetErrorXlow(ip)
	xmax = x + robj.GetErrorXhigh(ip)
	ey = (robj.GetErrorXlow(ip) + robj.GetErrorXhigh(ip)) / 2.0
	bins.append(DataBin(xmin, xmax, y, ey))
	histos[rname] = Histo(bins, rname)
	f.Close()
	except Exception, e:
	print "Can't load histos from ROOT file '%s': %s" % (path, e)

	return histos


	def read_histos_yoda(path):
	"Load histograms from a YODA-supported file type, into a dict of path -> yoda.Histo[DataBin]"
	histos = {}
	try:
	import yoda
	s2s = []
	aos = yoda.read(path, asdict=False)
	for ao in aos:
	import os
	## Skip the Rivet cross-section and event counter objects
	# TODO: Avoid Rivet-specific behaviour by try block handling & scatter.dim requirements
	if os.path.basename(ao.path).startswith("_"):
	continue
	##
	s2s.append(ao.mkScatter())
	del aos #< pro-active YODA memory clean-up
	#
	#s2s = [ao.mkScatter() for ao in yoda.read(path, asdict=False)]
	for s2 in s2s:
	bins = [DataBin(p.xMin, p.xMax, p.y, p.yErrAvg) for p in s2.points]
	#bins = [DataBin(p.xMin, p.xMax, p.y*(p.xMax-p.xMin), p.yErrAvg) for p in s2.points]
	# bins = [DataBin(p.xMin, p.xMax, p.y, p.yErrs) for p in s2.points]
	histos[s2.path] = Histo(bins, s2.path)
	del s2s #< pro-active YODA memory clean-up
	except Exception, e:
	print "Can't load histos from file '%s': %s" % (path, e)
	return histos


	def read_histos(filepath, stripref=True):
	"""
	Load histograms from file, into a dict of path -> yoda.Histo[DataBin]

	If stripref = True, remove any leading /REF prefix from the histo path
	before putting it in the dictionary.
	"""
	histos = {}
	if filepath.endswith(".root"):
	histos.update(read_histos_root(filepath))
	elif any(filepath.endswith(ext) for ext in [".yoda", ".aida", ".flat"]):
	histos.update(read_histos_yoda(filepath))
	if stripref:
	newhistos = {}
	for p, h in histos.iteritems():
	if p.startswith("/REF"):
	p = p.replace("/REF", "", 1)
	h.path = p
	newhistos[p] = h
	histos = newhistos
	return histos


	def read_all_histos(dirpath, stripref=True):
	"""
	Load histograms from all files in the given dir, into a dict of path -> yoda.Histo[DataBin]

	If stripref = True, remove any leading /REF prefix from the histo path
	before putting it in the dictionary.
	"""
	histos = {}
	filepaths = glob.glob(os.path.join(dirpath, "*"))
	for fp in filepaths:
	histos.update(read_histos(fp, stripref))
	return histos


	def read_rundata(dirs, pfname="params.dat", verbosity=1): #, formats="yoda,root,aida,flat"):
	"""
	Read interpolation anchor point data from a provided set of run directory paths.

	Returns a pair of dicts, the first mapping run names (i.e. rundir basenames) to
	the parameter value list for each run, and the second mapping observable names
	(i.e. histogram paths) to a run -> histo dict.
	"""
	params, histos = {}, {}
	import os, glob, re
	re_pfname = re.compile(pfname) if pfname else None
	numruns = len(dirs)
	for num, d in enumerate(sorted(dirs)):
	run = os.path.basename(d)
	- if verbosity >= 2 or (verbosity >= 1 and (num % 100 == 0 or num == numruns)):
	- pct = 100*num/float(numruns)
	- print "Reading run '%s' data: %d/%d = %2.0f%%" % (run, num, numruns, pct)
	+ if verbosity >= 2 or (verbosity >= 1 and (num % 100 == 99)): # or num == numruns-1)):
	+ pct = 100*(num+1)/float(numruns)
	+ print "Reading run '%s' data: %d/%d = %2.0f%%" % (run, num+1, numruns, pct)
	files = glob.glob(os.path.join(d, "*"))
	for f in files:
	## Params file
	#if os.path.basename(f) == pfname:
	if re_pfname and re_pfname.search(os.path.basename(f)):
	params[run] = read_paramsfile(f)
	## Histo file
	else:
	try:
	## Read as a path -> Histo dict
	hs = read_histos(f)
	## Restructure into the path -> run -> Histo return dict
	for path, hist in hs.iteritems():
	histos.setdefault(path, {})[run] = hist
	except:
	pass #< skip files that can't be read as histos

	## Check that a params file was found and read in this dir... or that no attempt was made to find one
	if pfname:
	if run not in params.keys():
	raise Exception("No params file '%s' found in run dir '%s'" % (pfname, d))
	else:
	params = None
	return params, histos


	def read_all_rundata(runsdir, pfname="params.dat", verbosity=1):
	rundirs = glob.glob(os.path.join(runsdir, "*"))
	return read_rundata(rundirs, pfname, verbosity)

	def read_all_rundata_yaml(yamlfile):
	from professor2.utils import mkdict
	PARAMS = {}
	HISTOS = {}
	import yaml
	print "Loading YAML from %s"%yamlfile
	Y=yaml.load(open(yamlfile))
	for num, y in enumerate(Y):
	P=mkdict()
	for p in sorted(y['Params'].keys()):
	P[p] = float(y['Params'][p])
	PARAMS[num]=P
	for f in y['YodaFiles']:
	hs = read_histos(f)
	for path, hist in hs.iteritems():
	HISTOS.setdefault(path, {})[num] = hist
	return PARAMS, HISTOS


	def find_maxerrs(histos):
	"""
	Helper function to find the maximum error values found for each bin in the histos double-dict.

	histos is a nested dict of DataHisto objects, indexed first by histo path and then by
	run name, i.e. it is the second of the two objects returned by read_histos().

	Returns a dict of lists of floats, indexed by histo path. Each list of floats contains the
	max error size seen for each bin of the named observable, across the collection of runs
	histos.keys().

	This functions is useful for regularising chi2 etc. computation by constraining interpolated
	uncertainties to within the range seen in the run data used to create the ipols, to avoid
	blow-up of uncertainties with corresponding distortion of fit optimisation.

	TODO: asymm version?
	"""
	rtn = {}
	for hn, hs in histos.iteritems():
	numbins_h = hs.next().nbins
	maxerrs_h = []
	for ib in xrange(numbins_h):
	emax = max(h.bins[ib].err for h in hs.values())
	maxerrs_h.append(emax)
	rtn[hn] = maxerrs_h
	return rtn

File Metadata

Mime Type: text/x-diff
Expires: Thu, Apr 24, 6:35 AM (1 d, 19 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 4885289
Default Alt Text: (14 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions