CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
JacknifeQuantile.cc
Go to the documentation of this file.
1 #include "../interface/JacknifeQuantile.h"
2 #include <cmath>
3 #include <stdexcept>
4 #include <iostream>
5 #include <algorithm>
6 #include <Math/QuantFuncMathMore.h>
7 #include <RooAbsData.h>
8 #include <RooRealVar.h>
9 
11 {
12 }
13 
15 {
16 }
17 
18 QuantileCalculator::QuantileCalculator(const std::vector<double> &values, const std::vector<double> &weights)
19 {
20  import<double>(values, weights);
21 }
22 
23 QuantileCalculator::QuantileCalculator(const std::vector<float> &values, const std::vector<float> &weights)
24 {
25  import<float>(values, weights);
26 }
27 
28 QuantileCalculator::QuantileCalculator(const RooAbsData &data, const char *varName, int firstEntry, int lastEntry)
29 {
30  if (lastEntry == -1) lastEntry = data.numEntries();
31  const RooArgSet *set = data.get();
32  const RooRealVar *x = dynamic_cast<const RooRealVar *>(set->find(varName));
33  if (x == 0) {
34  set->Print("V");
35  throw std::logic_error("Parameter of interest not in the idataset");
36  }
37  sumw_.resize(1, 0.0);
38  if (firstEntry < lastEntry) {
39  points_.resize(lastEntry - firstEntry);
40  for (int i = firstEntry, j = 0; i < lastEntry; ++i, ++j) {
41  data.get(i);
42  points_[j].x = x->getVal();
43  points_[j].w = data.weight();
44  sumw_[0] += points_[j].w;
45  }
46  }
47 }
48 
50 {
51  std::random_shuffle(points_.begin(), points_.end());
52 }
53 
54 std::pair<double,double> QuantileCalculator::quantileAndError(double quantile, Method method)
55 {
56  if (method == Simple) {
57  std::sort(points_.begin(), points_.end());
58  quantiles(quantile, false);
59  return std::pair<double,double>(quantiles_[0], 0);
60  } else if (method == Sectioning || method == Jacknife) {
61  int m = guessPartitions(points_.size(), quantile);
62  partition(m, (method == Jacknife));
63  std::sort(points_.begin(), points_.end());
64  quantiles(quantile, (method == Jacknife));
65  double avg = 0;
66  for (int i = 0; i < m; ++i) {
67  avg += quantiles_[i];
68  }
69  avg /= m;
70  double rms = 0;
71  for (int i = 0; i < m; ++i) {
72  rms += (quantiles_[i] - avg)*(quantiles_[i] - avg);
73  }
74  rms = sqrt(rms/(m*(m-1)));
75  double onesigma = ROOT::Math::tdistribution_quantile_c(0.16, m-1);
76  return std::pair<double,double>(avg, rms * onesigma);
77  }
78  return std::pair<double,double>(0,-1);
79 }
80 
81 int QuantileCalculator::guessPartitions(int size, double quantile)
82 {
83  // number of events on the small side of the quantile for m=1
84  double smallnum = size * std::min(quantile, 1-quantile);
85  // now the naive idea is that err(q) ~ 1/sqrt(smallnum/m), while the error from averaging does as ~1/sqrt(m)
86  // so you want m ~ sqrt(smallnum)
87  int n = 5; //std::min(std::max(3., sqrt(smallnum)), 5.);
88  std::cout << " ..... will split the " << size << " events in " << n << " subsets, smallnum is " << smallnum << std::endl;
89  return n;
90 }
91 
92 template<typename T>
93 void QuantileCalculator::import(const std::vector<T> &values, const std::vector<T> &weights)
94 {
95  int n = values.size();
96  points_.resize(values.size());
97  sumw_.resize(1); sumw_[0] = 0.0;
98  for (int i = 0; i < n; ++i) {
99  points_[i].x = values[i];
100  points_[i].w = weights.empty() ? 1 : weights[i];
101  points_[i].set = 0;
102  sumw_[0] += points_[i].w;
103  }
104 }
105 
106 
107 void QuantileCalculator::partition(int m, bool doJacknife)
108 {
109  int n = points_.size();
110  sumw_.resize(m+1);
111  std::fill(sumw_.begin(), sumw_.end(), 0.0);
112  double alpha = double(m)/n;
113  for (int i = 0, j = 0; i < n; ++i) {
114  j = floor(i*alpha);
115  points_[i].set = j;
116  sumw_[j] += points_[i].w;
117  }
118  if (doJacknife) {
119  // at this point sumw[j] has the weights of j...
120  // now I have to get the weights of everyhing else
121  // start with all weights
122  for (int j = 0; j < m; ++j) sumw_[m] += sumw_[j];
123  // and then subtract
124  for (int j = 0; j < m; ++j) sumw_[j] = (sumw_[m] - sumw_[j]);
125  }
126 }
127 
128 void QuantileCalculator::quantiles(double quantile, bool doJacknife)
129 {
130  int m = sumw_.size()-1;
131  int n = points_.size();
132  quantiles_.resize(m+1);
133  for (int j = 0; j <= m; ++j) {
134  double runningSum = 0;
135  double threshold = quantile * sumw_[j];
136  int ilow = 0, ihigh = n-1;
137  for (int i = 0; i < n; ++i) {
138  if ((points_[i].set == j) == doJacknife) continue; // if jacknife, cut away just one piece, otherwise cut away everthing else
139  //std::cout << "\t\t\t" << points_[i].x << std::endl;;
140  if (runningSum + points_[i].w <= threshold) {
141  runningSum += points_[i].w;
142  ilow = i;
143  } else {
144  ihigh = i;
145  break;
146  }
147  }
148  if (runningSum == threshold) { // possible if all unit weights
149  quantiles_[j] = points_[ilow].x;
150  } else {
151  quantiles_[j] = 0.5*(points_[ilow].x + points_[ihigh].x);
152  }
153  }
154  for (int j = 0; j <= m; ++j) {
155  //printf(" ... quantile of section %d: %6.3f\n", j, quantiles_[j]);
156  }
157  if (doJacknife) {
158  // now compute the pseudo-values alpha_[j] = m * quantile[m] - (m-1) * quantile[j]
159  for (int j = 0; j < m; ++j) {
160  quantiles_[j] = m * quantiles_[m] - (m-1) * quantiles_[j];
161  printf(" ... jacknife quantile of section %d: %6.3f\n", j, quantiles_[j]);
162  }
163  }
164 }
void import(const std::vector< T > &values, const std::vector< T > &weights)
int i
Definition: DBlmapReader.cc:9
float alpha
Definition: AMPTWrapper.h:95
string fill
Definition: lumiContext.py:319
std::vector< float > quantiles_
#define min(a, b)
Definition: mlp_lapack.h:161
void partition(int m, bool doJacknife)
T sqrt(T t)
Definition: SSEVec.h:46
int j
Definition: DBlmapReader.cc:9
std::pair< double, double > quantileAndError(double quantile, Method method)
std::vector< double > sumw_
void quantiles(double quantile, bool doJacknife)
std::vector< point > points_
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
tuple cout
Definition: gather_cfg.py:121
void randomizePoints()
Randomize points before sectioning.
x
Definition: VDTMath.h:216
tuple size
Write out results.
T w() const
void set(const std::string &name, int value)
set the flag, with a run-time name
int guessPartitions(int size, double quantile)