Page MenuHomeHEPForge

No OneTemporary

diff --git a/bin/prof2-envelopes b/bin/prof2-envelopes
--- a/bin/prof2-envelopes
+++ b/bin/prof2-envelopes
@@ -1,184 +1,185 @@
#! /usr/bin/env python
"""\
%prog <runsdir> [datafile directory] [opts]
Envelope of input data
"""
import matplotlib, os
matplotlib.use(os.environ.get("MPL_BACKEND", "Agg"))
import optparse, os, sys
op = optparse.OptionParser(usage=__doc__)
op.add_option("--pname", "--pfile", dest="PNAME", default="params.dat", help="Name of the params file to be found in each run directory (default: %default)")
op.add_option("--wfile", dest="WFILE", default=None, help="Path to a weight file, used to restrict plotting to a subset of histograms (default: %default)")
op.add_option("-o", "--outdir", dest="OUTDIR", default="envelopes", help="Output folder for plots (default: %default)")
op.add_option("-v", "--debug", dest="DEBUG", action="store_true", default=False, help="Turn on some debug messages")
op.add_option("-q", "--quiet", dest="QUIET", action="store_true", default=False, help="Turn off messages")
opts, args = op.parse_args()
# TODO: add ipol envelope!
## Get mandatory arguments
if len(args) < 1:
print "Argument missing... exiting\n\n"
op.print_usage()
sys.exit(1)
RUNSDIR = args[0]
if len(args)==2:
REFDIR = args[1]
else:
print "No data directory given"
REFDIR = None
## Load the Professor machinery
import professor2 as prof
if not opts.QUIET:
print prof.logo
## Load MC run histos and params
import glob
INDIRS = glob.glob(os.path.join(RUNSDIR, "*"))
try:
PARAMS, HISTOS = prof.read_rundata(INDIRS, opts.PNAME)
+ print
except Exception, e:
print e
sys.exit(1)
## Weight file parsing to select a histos subset
if opts.WFILE:
matchers = prof.read_pointmatchers(opts.WFILE)
for hn in HISTOS.keys():
if not any(m.match_path(hn) for m in matchers.keys()):
del HISTOS[hn]
elif opts.DEBUG:
print "Observable %s passed weight file path filter" % hn
print "Filtered observables by path, %d remaining" % len(HISTOS)
HNAMES = HISTOS.keys()
## If there's nothing left to interpolate, exit!
if not HNAMES:
print "No observables remaining... exiting"
sys.exit(1)
def mk_envelope(histos):
""" Take DataHistos and return coordinates for plotting """
# Iterate over bins, get envelope representation
# For each bin, return [xmin, xmax, mean(y), min(y), max(y)]
E=[]
for num, b in enumerate(histos[histos.keys()[0]].bins):
t_b = [x.bins[num].val for x in histos.values()]
E.append([b.xmin, b.xmax, sum(t_b)/len(t_b), min(t_b), max(t_b)])
return E
def mk_data(histo):
""" Take a DataHisto and return coordinates for plotting """
d= []
for num, b in enumerate(histo.bins):
d.append([b.xmid, min(b.xmax-b.xmid, b.xmid- b.xmin), b.val, b.err])
return d
def plot_envelope(env, data=None, name="envelope"):
import matplotlib.pyplot as plt
import matplotlib as mpl
# This is the Main figure object
- fig=plt.figure(figsize=(8,6), dpi=100)
+ fig = plt.figure(figsize=(8,6), dpi=100)
# This sets up the grid for main and ratio plot
gs = mpl.gridspec.GridSpec(1, 1)#, height_ratios=[3,1], hspace=0)
# Create a main plot
axmain = fig.add_subplot(gs[0])
axmain.set_ylabel("$f(x)$")
axmain.set_xlabel("$x=$ %s"%name)
# Convenience stuff for plotting clarity
X, UPPER, LOWER, MEAN = [], [], [], []
for num, b in enumerate(env):
ymean = b[-3]
ymin = b[-2]
ymax = b[-1]
xmin = b[0]
xmax = b[1]
X.append(xmin)
X.append(xmax)
UPPER.append(ymax)
UPPER.append(ymax)
LOWER.append(ymin)
LOWER.append(ymin)
MEAN.append( ymean )
MEAN.append( ymean )
# Envelope
axmain.fill_between(X, LOWER, UPPER, edgecolor="none", facecolor='yellow', interpolate=False)
# Mean of envelope
axmain.plot(X, MEAN, "b-", label="Mean")
# Data plot
if data is not None:
Xdata = [x[0] for x in data]
dX = [x[1] for x in data]
Ydata = [x[2] for x in data]
dY = [x[3] for x in data]
axmain.errorbar(Xdata, Ydata, dY, dX, fmt="k.", linewidth=1.3, label="Data")
# Switch off the frame in the legend
leg = axmain.legend(loc=0, numpoints=1)
- fr=leg.get_frame()
+ fr = leg.get_frame()
fr.set_visible(False)
# Remove all unnecessary white space
plt.tight_layout()
# Output dir, path
import os
if not os.path.exists(opts.OUTDIR):
os.makedirs(opts.OUTDIR)
outname = os.path.join(opts.OUTDIR, "%s.pdf"%name)
# Save image as PDF
plt.savefig(outname)
- plt.clf()
+ plt.close(fig)
## Read reference data histos
import os, sys, glob
DHISTOS = {}
if REFDIR is not None:
REFFILES = glob.glob(os.path.join(REFDIR, "*"))
for rf in REFFILES:
DHISTOS.update(prof.read_histos(rf))
DNAMES={}
# filtering
for h in HNAMES:
for d in DHISTOS.keys():
if h in d:
DNAMES[h]=d
# Free memory
for d in DHISTOS.keys():
if not d in DNAMES.values():
del DHISTOS[d]
# Plotting
for hname in HISTOS.keys():
h=HISTOS[hname]
e=mk_envelope(h)
# Sanitisation for label and output file name
name=hname.lstrip("/").replace("/","_")
# Plot data too
if REFDIR is not None and hname in DNAMES.keys():
data = mk_data(DHISTOS[DNAMES[hname]])
plot_envelope(e, data, name)
else:
plot_envelope(e, name=name)
if not opts.QUIET:
print "Plots written to %s"%opts.OUTDIR
sys.exit(0)
diff --git a/pyext/professor2/dataio.py b/pyext/professor2/dataio.py
--- a/pyext/professor2/dataio.py
+++ b/pyext/professor2/dataio.py
@@ -1,219 +1,219 @@
# -*- python -*-
from professor2.histos import *
from professor2.paramsio import *
import os.path, glob
def read_histos_root(path):
"Load histograms from a ROOT file, into a dict of path -> yoda.Histo[DataBin]"
histos = {}
# TODO: Could just use YODA for everything, including ROOT reading?
try:
import ROOT
ROOT.gROOT.SetBatch(True)
except ImportError:
print "PyROOT not available... skipping"
return histos
# TODO: use yoda.root.getall
def _getallrootobjs(d, basepath="/"):
"Recurse through a ROOT file/dir and generate (path, obj) pairs"
for key in d.GetListOfKeys():
kname = key.GetName()
if key.IsFolder():
# TODO: -> "yield from" in Py3
for i in _getallrootobjs(d.Get(kname), basepath+kname+"/"):
yield i
else:
yield basepath+kname, d.Get(kname)
try:
f = ROOT.TFile(path)
for rname, robj in _getallrootobjs(f):
bins = []
if robj.InheritsFrom("TH1"):
# TODO: allow 2D histos
if robj.InheritsFrom("TH2"):
continue
for ib in xrange(robj.GetNbinsX()):
xmin = robj.GetXaxis().GetBinLowEdge(ib+1)
xmax = robj.GetXaxis().GetBinUpEdge(ib+1)
y = robj.GetBinContent(ib+1)
ey = robj.GetBinError(ib+1)
bins.append(DataBin(xmin, xmax, y, ey))
histos[rname] = Histo(bins, rname)
elif robj.InheritsFrom("TGraph"):
for ip in xrange(robj.GetN()):
x, y = ROOT.Double(), ROOT.Double()
robj.GetPoint(ip, x, y)
xmin = x - robj.GetErrorXlow(ip)
xmax = x + robj.GetErrorXhigh(ip)
ey = (robj.GetErrorXlow(ip) + robj.GetErrorXhigh(ip)) / 2.0
bins.append(DataBin(xmin, xmax, y, ey))
histos[rname] = Histo(bins, rname)
f.Close()
except Exception, e:
print "Can't load histos from ROOT file '%s': %s" % (path, e)
return histos
def read_histos_yoda(path):
"Load histograms from a YODA-supported file type, into a dict of path -> yoda.Histo[DataBin]"
histos = {}
try:
import yoda
s2s = []
aos = yoda.read(path, asdict=False)
for ao in aos:
import os
## Skip the Rivet cross-section and event counter objects
# TODO: Avoid Rivet-specific behaviour by try block handling & scatter.dim requirements
if os.path.basename(ao.path).startswith("_"):
continue
##
s2s.append(ao.mkScatter())
del aos #< pro-active YODA memory clean-up
#
#s2s = [ao.mkScatter() for ao in yoda.read(path, asdict=False)]
for s2 in s2s:
bins = [DataBin(p.xMin, p.xMax, p.y, p.yErrAvg) for p in s2.points]
#bins = [DataBin(p.xMin, p.xMax, p.y*(p.xMax-p.xMin), p.yErrAvg) for p in s2.points]
# bins = [DataBin(p.xMin, p.xMax, p.y, p.yErrs) for p in s2.points]
histos[s2.path] = Histo(bins, s2.path)
del s2s #< pro-active YODA memory clean-up
except Exception, e:
print "Can't load histos from file '%s': %s" % (path, e)
return histos
def read_histos(filepath, stripref=True):
"""
Load histograms from file, into a dict of path -> yoda.Histo[DataBin]
If stripref = True, remove any leading /REF prefix from the histo path
before putting it in the dictionary.
"""
histos = {}
if filepath.endswith(".root"):
histos.update(read_histos_root(filepath))
elif any(filepath.endswith(ext) for ext in [".yoda", ".aida", ".flat"]):
histos.update(read_histos_yoda(filepath))
if stripref:
newhistos = {}
for p, h in histos.iteritems():
if p.startswith("/REF"):
p = p.replace("/REF", "", 1)
h.path = p
newhistos[p] = h
histos = newhistos
return histos
def read_all_histos(dirpath, stripref=True):
"""
Load histograms from all files in the given dir, into a dict of path -> yoda.Histo[DataBin]
If stripref = True, remove any leading /REF prefix from the histo path
before putting it in the dictionary.
"""
histos = {}
filepaths = glob.glob(os.path.join(dirpath, "*"))
for fp in filepaths:
histos.update(read_histos(fp, stripref))
return histos
def read_rundata(dirs, pfname="params.dat", verbosity=1): #, formats="yoda,root,aida,flat"):
"""
Read interpolation anchor point data from a provided set of run directory paths.
Returns a pair of dicts, the first mapping run names (i.e. rundir basenames) to
the parameter value list for each run, and the second mapping observable names
(i.e. histogram paths) to a run -> histo dict.
"""
params, histos = {}, {}
import os, glob, re
re_pfname = re.compile(pfname) if pfname else None
numruns = len(dirs)
for num, d in enumerate(sorted(dirs)):
run = os.path.basename(d)
- if verbosity >= 2 or (verbosity >= 1 and (num % 100 == 0 or num == numruns)):
- pct = 100*num/float(numruns)
- print "Reading run '%s' data: %d/%d = %2.0f%%" % (run, num, numruns, pct)
+ if verbosity >= 2 or (verbosity >= 1 and (num % 100 == 99)): # or num == numruns-1)):
+ pct = 100*(num+1)/float(numruns)
+ print "Reading run '%s' data: %d/%d = %2.0f%%" % (run, num+1, numruns, pct)
files = glob.glob(os.path.join(d, "*"))
for f in files:
## Params file
#if os.path.basename(f) == pfname:
if re_pfname and re_pfname.search(os.path.basename(f)):
params[run] = read_paramsfile(f)
## Histo file
else:
try:
## Read as a path -> Histo dict
hs = read_histos(f)
## Restructure into the path -> run -> Histo return dict
for path, hist in hs.iteritems():
histos.setdefault(path, {})[run] = hist
except:
pass #< skip files that can't be read as histos
## Check that a params file was found and read in this dir... or that no attempt was made to find one
if pfname:
if run not in params.keys():
raise Exception("No params file '%s' found in run dir '%s'" % (pfname, d))
else:
params = None
return params, histos
def read_all_rundata(runsdir, pfname="params.dat", verbosity=1):
rundirs = glob.glob(os.path.join(runsdir, "*"))
return read_rundata(rundirs, pfname, verbosity)
def read_all_rundata_yaml(yamlfile):
from professor2.utils import mkdict
PARAMS = {}
HISTOS = {}
import yaml
print "Loading YAML from %s"%yamlfile
Y=yaml.load(open(yamlfile))
for num, y in enumerate(Y):
P=mkdict()
for p in sorted(y['Params'].keys()):
P[p] = float(y['Params'][p])
PARAMS[num]=P
for f in y['YodaFiles']:
hs = read_histos(f)
for path, hist in hs.iteritems():
HISTOS.setdefault(path, {})[num] = hist
return PARAMS, HISTOS
def find_maxerrs(histos):
"""
Helper function to find the maximum error values found for each bin in the histos double-dict.
histos is a nested dict of DataHisto objects, indexed first by histo path and then by
run name, i.e. it is the second of the two objects returned by read_histos().
Returns a dict of lists of floats, indexed by histo path. Each list of floats contains the
max error size seen for each bin of the named observable, across the collection of runs
histos.keys().
This functions is useful for regularising chi2 etc. computation by constraining interpolated
uncertainties to within the range seen in the run data used to create the ipols, to avoid
blow-up of uncertainties with corresponding distortion of fit optimisation.
TODO: asymm version?
"""
rtn = {}
for hn, hs in histos.iteritems():
numbins_h = hs.next().nbins
maxerrs_h = []
for ib in xrange(numbins_h):
emax = max(h.bins[ib].err for h in hs.values())
maxerrs_h.append(emax)
rtn[hn] = maxerrs_h
return rtn

File Metadata

Mime Type
text/x-diff
Expires
Thu, Apr 24, 6:35 AM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
4885289
Default Alt Text
(14 KB)

Event Timeline