00001 #include "../interface/MultiDimFit.h"
00002 #include <stdexcept>
00003 #include <cmath>
00004
00005 #include "TMath.h"
00006 #include "RooArgSet.h"
00007 #include "RooArgList.h"
00008 #include "RooRandom.h"
00009 #include "RooAbsData.h"
00010 #include "RooFitResult.h"
00011 #include "../interface/RooMinimizerOpt.h"
00012 #include <RooStats/ModelConfig.h>
00013 #include "../interface/Combine.h"
00014 #include "../interface/CascadeMinimizer.h"
00015 #include "../interface/CloseCoutSentry.h"
00016 #include "../interface/utils.h"
00017
00018 #include <Math/Minimizer.h>
00019 #include <Math/MinimizerOptions.h>
00020 #include <Math/QuantFuncMathCore.h>
00021 #include <Math/ProbFunc.h>
00022
00023 using namespace RooStats;
00024
00025 MultiDimFit::Algo MultiDimFit::algo_ = None;
00026 std::vector<std::string> MultiDimFit::poi_;
00027 std::vector<RooRealVar *> MultiDimFit::poiVars_;
00028 std::vector<float> MultiDimFit::poiVals_;
00029 RooArgList MultiDimFit::poiList_;
00030 float MultiDimFit::deltaNLL_ = 0;
00031 unsigned int MultiDimFit::points_ = 50;
00032 unsigned int MultiDimFit::firstPoint_ = 0;
00033 unsigned int MultiDimFit::lastPoint_ = std::numeric_limits<unsigned int>::max();
00034 bool MultiDimFit::floatOtherPOIs_ = false;
00035 unsigned int MultiDimFit::nOtherFloatingPoi_ = 0;
00036 bool MultiDimFit::fastScan_ = false;
00037
00038
00039 MultiDimFit::MultiDimFit() :
00040 FitterAlgoBase("MultiDimFit specific options")
00041 {
00042 options_.add_options()
00043 ("algo", boost::program_options::value<std::string>()->default_value("none"), "Algorithm to compute uncertainties")
00044 ("poi,P", boost::program_options::value<std::vector<std::string> >(&poi_), "Parameters of interest to fit (default = all)")
00045 ("floatOtherPOIs", boost::program_options::value<bool>(&floatOtherPOIs_)->default_value(floatOtherPOIs_), "POIs other than the selected ones will be kept freely floating (1) or fixed (0, default)")
00046 ("points", boost::program_options::value<unsigned int>(&points_)->default_value(points_), "Points to use for grid or contour scans")
00047 ("firstPoint", boost::program_options::value<unsigned int>(&firstPoint_)->default_value(firstPoint_), "First point to use")
00048 ("lastPoint", boost::program_options::value<unsigned int>(&lastPoint_)->default_value(lastPoint_), "Last point to use")
00049 ("fastScan", "Do a fast scan, evaluating the likelihood without profiling it.")
00050 ;
00051 }
00052
00053 void MultiDimFit::applyOptions(const boost::program_options::variables_map &vm)
00054 {
00055 applyOptionsBase(vm);
00056 std::string algo = vm["algo"].as<std::string>();
00057 if (algo == "none") {
00058 algo_ = None;
00059 } else if (algo == "singles") {
00060 algo_ = Singles;
00061 } else if (algo == "cross") {
00062 algo_ = Cross;
00063 } else if (algo == "grid") {
00064 algo_ = Grid;
00065 } else if (algo == "random") {
00066 algo_ = RandomPoints;
00067 } else if (algo == "contour2d") {
00068 algo_ = Contour2D;
00069 } else throw std::invalid_argument(std::string("Unknown algorithm: "+algo));
00070 fastScan_ = (vm.count("fastScan") > 0);
00071 }
00072
00073 bool MultiDimFit::runSpecific(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00074
00075 static int isInit = false;
00076 if (!isInit) { initOnce(w, mc_s); isInit = true; }
00077
00078
00079 RooAbsPdf &pdf = *mc_s->GetPdf();
00080
00081
00082 nOtherFloatingPoi_ = 0;
00083 RooLinkedListIter iterP = mc_s->GetParametersOfInterest()->iterator();
00084 for (RooAbsArg *a = (RooAbsArg*) iterP.Next(); a != 0; a = (RooAbsArg*) iterP.Next()) {
00085 if (poiList_.contains(*a)) continue;
00086 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00087 if (rrv == 0) { std::cerr << "MultiDimFit: Parameter of interest " << a->GetName() << " which is not a RooRealVar will be ignored" << std::endl; continue; }
00088 rrv->setConstant(!floatOtherPOIs_);
00089 if (floatOtherPOIs_) nOtherFloatingPoi_++;
00090 }
00091
00092
00093 const RooCmdArg &constrainCmdArg = withSystematics ? RooFit::Constrain(*mc_s->GetNuisanceParameters()) : RooFit::NumCPU(1);
00094 std::auto_ptr<RooFitResult> res(doFit(pdf, data, (algo_ == Singles ? poiList_ : RooArgList()), constrainCmdArg));
00095 if (res.get() || keepFailures_) {
00096 for (int i = 0, n = poi_.size(); i < n; ++i) {
00097 poiVals_[i] = poiVars_[i]->getVal();
00098 }
00099 Combine::commitPoint(false, 1.);
00100 }
00101 std::auto_ptr<RooAbsReal> nll;
00102 if (algo_ != None && algo_ != Singles) {
00103 nll.reset(pdf.createNLL(data, constrainCmdArg, RooFit::Extended(pdf.canBeExtended())));
00104 }
00105 switch(algo_) {
00106 case None:
00107 if (verbose > 0) {
00108 std::cout << "\n --- MultiDimFit ---" << std::endl;
00109 std::cout << "best fit parameter values: " << std::endl;
00110 int len = poi_[0].length();
00111 for (int i = 0, n = poi_.size(); i < n; ++i) {
00112 len = std::max<int>(len, poi_[i].length());
00113 }
00114 for (int i = 0, n = poi_.size(); i < n; ++i) {
00115 printf(" %*s : %+8.3f\n", len, poi_[i].c_str(), poiVals_[i]);
00116 }
00117 }
00118 break;
00119 case Singles: if (res.get()) doSingles(*res); break;
00120 case Cross: doBox(*nll, cl, "box", true); break;
00121 case Grid: doGrid(*nll); break;
00122 case RandomPoints: doRandomPoints(*nll); break;
00123 case Contour2D: doContour2D(*nll); break;
00124 }
00125
00126 return true;
00127 }
00128
00129 void MultiDimFit::initOnce(RooWorkspace *w, RooStats::ModelConfig *mc_s) {
00130 RooArgSet mcPoi(*mc_s->GetParametersOfInterest());
00131 if (poi_.empty()) {
00132 RooLinkedListIter iterP = mc_s->GetParametersOfInterest()->iterator();
00133 for (RooAbsArg *a = (RooAbsArg*) iterP.Next(); a != 0; a = (RooAbsArg*) iterP.Next()) {
00134 poi_.push_back(a->GetName());
00135 }
00136 }
00137 for (std::vector<std::string>::const_iterator it = poi_.begin(), ed = poi_.end(); it != ed; ++it) {
00138 RooAbsArg *a = mcPoi.find(it->c_str());
00139 if (a == 0) throw std::invalid_argument(std::string("Parameter of interest ")+*it+" not in model.");
00140 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00141 if (rrv == 0) throw std::invalid_argument(std::string("Parameter of interest ")+*it+" not a RooRealVar.");
00142 poiVars_.push_back(rrv);
00143 poiVals_.push_back(rrv->getVal());
00144 poiList_.add(*rrv);
00145 }
00146
00147 for (int i = 0, n = poi_.size(); i < n; ++i) {
00148 Combine::addBranch(poi_[i].c_str(), &poiVals_[i], (poi_[i]+"/F").c_str());
00149 }
00150 Combine::addBranch("deltaNLL", &deltaNLL_, "deltaNLL/F");
00151 }
00152
00153 void MultiDimFit::doSingles(RooFitResult &res)
00154 {
00155 std::cout << "\n --- MultiDimFit ---" << std::endl;
00156 std::cout << "best fit parameter values and profile-likelihood uncertainties: " << std::endl;
00157 int len = poi_[0].length();
00158 for (int i = 0, n = poi_.size(); i < n; ++i) {
00159 len = std::max<int>(len, poi_[i].length());
00160 }
00161 for (int i = 0, n = poi_.size(); i < n; ++i) {
00162 RooRealVar *rf = dynamic_cast<RooRealVar*>(res.floatParsFinal().find(poi_[i].c_str()));
00163 double bestFitVal = rf->getVal();
00164
00165 double hiErr = +(rf->hasRange("err68") ? rf->getMax("err68") - bestFitVal : rf->getAsymErrorHi());
00166 double loErr = -(rf->hasRange("err68") ? rf->getMin("err68") - bestFitVal : rf->getAsymErrorLo());
00167 double maxError = std::max<double>(std::max<double>(hiErr, loErr), rf->getError());
00168
00169 if (fabs(hiErr) < 0.001*maxError) hiErr = -bestFitVal + rf->getMax();
00170 if (fabs(loErr) < 0.001*maxError) loErr = +bestFitVal - rf->getMin();
00171
00172 double hiErr95 = +(do95_ && rf->hasRange("err95") ? rf->getMax("err95") - bestFitVal : 0);
00173 double loErr95 = -(do95_ && rf->hasRange("err95") ? rf->getMin("err95") - bestFitVal : 0);
00174
00175 poiVals_[i] = bestFitVal - loErr; Combine::commitPoint(true, 0.32);
00176 poiVals_[i] = bestFitVal + hiErr; Combine::commitPoint(true, 0.32);
00177 if (do95_ && rf->hasRange("err95")) {
00178 poiVals_[i] = rf->getMax("err95"); Combine::commitPoint(true, 0.05);
00179 poiVals_[i] = rf->getMin("err95"); Combine::commitPoint(true, 0.05);
00180 poiVals_[i] = bestFitVal;
00181 printf(" %*s : %+8.3f %+6.3f/%+6.3f (68%%) %+6.3f/%+6.3f (95%%) \n", len, poi_[i].c_str(),
00182 poiVals_[i], -loErr, hiErr, loErr95, -hiErr95);
00183 } else {
00184 poiVals_[i] = bestFitVal;
00185 printf(" %*s : %+8.3f %+6.3f/%+6.3f (68%%)\n", len, poi_[i].c_str(),
00186 poiVals_[i], -loErr, hiErr);
00187 }
00188 }
00189 }
00190
00191 void MultiDimFit::doGrid(RooAbsReal &nll)
00192 {
00193 unsigned int n = poi_.size();
00194 if (poi_.size() > 2) throw std::logic_error("Don't know how to do a grid with more than 2 POIs.");
00195 double nll0 = nll.getVal();
00196
00197 std::vector<double> p0(n), pmin(n), pmax(n);
00198 for (unsigned int i = 0; i < n; ++i) {
00199 p0[i] = poiVars_[i]->getVal();
00200 pmin[i] = poiVars_[i]->getMin();
00201 pmax[i] = poiVars_[i]->getMax();
00202 poiVars_[i]->setConstant(true);
00203 }
00204
00205 CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00206 minim.setStrategy(minimizerStrategy_);
00207 std::auto_ptr<RooArgSet> params(nll.getParameters((const RooArgSet *)0));
00208 RooArgSet snap; params->snapshot(snap);
00209
00210 if (n == 1) {
00211 for (unsigned int i = 0; i < points_; ++i) {
00212 double x = pmin[0] + (i+0.5)*(pmax[0]-pmin[0])/points_;
00213 if (verbose > 1) std::cout << "Point " << i << "/" << points_ << " " << poiVars_[0]->GetName() << " = " << x << std::endl;
00214 *params = snap;
00215 poiVals_[0] = x;
00216 poiVars_[0]->setVal(x);
00217
00218 {
00219
00220 bool ok = fastScan_ ? true : minim.minimize(verbose-1);
00221 if (ok) {
00222 deltaNLL_ = nll.getVal() - nll0;
00223 double qN = 2*(deltaNLL_);
00224 double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00225 Combine::commitPoint(true, prob);
00226 }
00227 }
00228 }
00229 } else if (n == 2) {
00230 unsigned int sqrn = ceil(sqrt(double(points_)));
00231 unsigned int ipoint = 0, nprint = ceil(0.005*sqrn*sqrn);
00232 RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CountErrors);
00233 CloseCoutSentry sentry(verbose < 2);
00234 for (unsigned int i = 0; i < sqrn; ++i) {
00235 for (unsigned int j = 0; j < sqrn; ++j, ++ipoint) {
00236 if (ipoint < firstPoint_) continue;
00237 if (ipoint > lastPoint_) break;
00238 *params = snap;
00239 double x = pmin[0] + (i+0.5)*(pmax[0]-pmin[0])/sqrn;
00240 double y = pmin[1] + (j+0.5)*(pmax[1]-pmin[1])/sqrn;
00241 if (verbose && (ipoint % nprint == 0)) {
00242 fprintf(sentry.trueStdOut(), "Point %d/%d, (i,j) = (%d,%d), %s = %f, %s = %f\n",
00243 ipoint,sqrn*sqrn, i,j, poiVars_[0]->GetName(), x, poiVars_[1]->GetName(), y);
00244 }
00245 poiVals_[0] = x;
00246 poiVals_[1] = y;
00247 poiVars_[0]->setVal(x);
00248 poiVars_[1]->setVal(y);
00249 nll.clearEvalErrorLog(); nll.getVal();
00250 if (nll.numEvalErrors() > 0) {
00251 deltaNLL_ = 9999; Combine::commitPoint(true, 0);
00252 continue;
00253 }
00254
00255 {
00256 bool ok = fastScan_ ? true : minim.minimize(verbose-1);
00257 if (ok) {
00258 deltaNLL_ = nll.getVal() - nll0;
00259 double qN = 2*(deltaNLL_);
00260 double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00261 Combine::commitPoint(true, prob);
00262 }
00263 }
00264 }
00265 }
00266 }
00267 }
00268
00269 void MultiDimFit::doRandomPoints(RooAbsReal &nll)
00270 {
00271 double nll0 = nll.getVal();
00272 for (unsigned int i = 0, n = poi_.size(); i < n; ++i) {
00273 poiVars_[i]->setConstant(true);
00274 }
00275
00276 CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00277 minim.setStrategy(minimizerStrategy_);
00278 unsigned int n = poi_.size();
00279 for (unsigned int j = 0; j < points_; ++j) {
00280 for (unsigned int i = 0; i < n; ++i) {
00281 poiVars_[i]->randomize();
00282 poiVals_[i] = poiVars_[i]->getVal();
00283 }
00284
00285 {
00286 CloseCoutSentry sentry(verbose < 3);
00287 bool ok = minim.minimize(verbose-1);
00288 if (ok) {
00289 double qN = 2*(nll.getVal() - nll0);
00290 double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00291 Combine::commitPoint(true, prob);
00292 }
00293 }
00294 }
00295 }
00296
00297 void MultiDimFit::doContour2D(RooAbsReal &nll)
00298 {
00299 if (poi_.size() != 2) throw std::logic_error("Contour2D works only in 2 dimensions");
00300 RooRealVar *xv = poiVars_[0]; double x0 = poiVals_[0]; float &x = poiVals_[0];
00301 RooRealVar *yv = poiVars_[1]; double y0 = poiVals_[1]; float &y = poiVals_[1];
00302
00303 double threshold = nll.getVal() + 0.5*ROOT::Math::chisquared_quantile_c(1-cl,2+nOtherFloatingPoi_);
00304 if (verbose>0) std::cout << "Best fit point is for " << xv->GetName() << ", " << yv->GetName() << " = " << x0 << ", " << y0 << std::endl;
00305
00306
00307 doBox(nll, cl, "box");
00308 double xMin = xv->getMin("box"), xMax = xv->getMax("box");
00309 double yMin = yv->getMin("box"), yMax = yv->getMax("box");
00310
00311 verbose--;
00312
00313 yv->setConstant(true);
00314 for (unsigned int j = 0; j <= points_; ++j) {
00315
00316 double yc = 0.5*(yMax + yMin), yr = 0.5*(yMax - yMin);
00317 yv->setVal( yc + yr * std::cos(j*M_PI/double(points_)) );
00318
00319 xv->setConstant(false); xv->setVal(x0);
00320 CascadeMinimizer minimXI(nll, CascadeMinimizer::Unconstrained, xv);
00321 minimXI.setStrategy(minimizerStrategy_);
00322 {
00323 CloseCoutSentry sentry(verbose < 3);
00324 minimXI.minimize(verbose-1);
00325 }
00326 double xc = xv->getVal(); xv->setConstant(true);
00327 if (verbose>-1) std::cout << "Best fit " << xv->GetName() << " for " << yv->GetName() << " = " << yv->getVal() << " is at " << xc << std::endl;
00328
00329 CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00330 double xup = findCrossing(minim, nll, *xv, threshold, xc, xMax);
00331 if (!std::isnan(xup)) {
00332 x = xup; y = yv->getVal(); Combine::commitPoint(true, 1-cl);
00333 if (verbose>-1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for " << yv->GetName() << " = " << y << " is " << x << std::endl;
00334 }
00335
00336 double xdn = findCrossing(minim, nll, *xv, threshold, xc, xMin);
00337 if (!std::isnan(xdn)) {
00338 x = xdn; y = yv->getVal(); Combine::commitPoint(true, 1-cl);
00339 if (verbose>-1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for " << yv->GetName() << " = " << y << " is " << x << std::endl;
00340 }
00341 }
00342
00343 verbose++;
00344 }
00345 void MultiDimFit::doBox(RooAbsReal &nll, double cl, const char *name, bool commitPoints) {
00346 unsigned int n = poi_.size();
00347 double nll0 = nll.getVal(), threshold = nll0 + 0.5*ROOT::Math::chisquared_quantile_c(1-cl,n+nOtherFloatingPoi_);
00348
00349 std::vector<double> p0(n);
00350 for (unsigned int i = 0; i < n; ++i) {
00351 p0[i] = poiVars_[i]->getVal();
00352 poiVars_[i]->setConstant(false);
00353 }
00354
00355 verbose--;
00356 for (unsigned int i = 0; i < n; ++i) {
00357 RooRealVar *xv = poiVars_[i];
00358 xv->setConstant(true);
00359 CascadeMinimizer minimX(nll, CascadeMinimizer::Constrained);
00360 minimX.setStrategy(minimizerStrategy_);
00361
00362 for (unsigned int j = 0; j < n; ++j) poiVars_[j]->setVal(p0[j]);
00363 double xMin = findCrossing(minimX, nll, *xv, threshold, p0[i], xv->getMin());
00364 if (!std::isnan(xMin)) {
00365 if (verbose > -1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMin << std::endl;
00366 for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00367 if (commitPoints) Combine::commitPoint(true, 1-cl);
00368 } else {
00369 xMin = xv->getMin();
00370 for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00371 double prob = ROOT::Math::chisquared_cdf_c(2*(nll.getVal() - nll0), n+nOtherFloatingPoi_);
00372 if (commitPoints) Combine::commitPoint(true, prob);
00373 if (verbose > -1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMin << " (on the boundary, p-val " << prob << ")" << std::endl;
00374 }
00375
00376 for (unsigned int j = 0; j < n; ++j) poiVars_[j]->setVal(p0[j]);
00377 double xMax = findCrossing(minimX, nll, *xv, threshold, p0[i], xv->getMax());
00378 if (!std::isnan(xMax)) {
00379 if (verbose > -1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMax << std::endl;
00380 for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00381 if (commitPoints) Combine::commitPoint(true, 1-cl);
00382 } else {
00383 xMax = xv->getMax();
00384 double prob = ROOT::Math::chisquared_cdf_c(2*(nll.getVal() - nll0), n+nOtherFloatingPoi_);
00385 for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00386 if (commitPoints) Combine::commitPoint(true, prob);
00387 if (verbose > -1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMax << " (on the boundary, p-val " << prob << ")" << std::endl;
00388 }
00389
00390 xv->setRange(name, xMin, xMax);
00391 xv->setConstant(false);
00392 }
00393 verbose++;
00394 }