00001
00002
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprPCATransformer.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsFilter.hh"
00006 #include "PhysicsTools/StatPatternRecognition/interface/SprDataMoments.hh"
00007 #include "PhysicsTools/StatPatternRecognition/interface/SprDefs.hh"
00008 #include "PhysicsTools/StatPatternRecognition/src/SprVector.hh"
00009 #include "PhysicsTools/StatPatternRecognition/src/SprSymMatrix.hh"
00010
00011 #include <cassert>
00012 #include <cmath>
00013 #include <algorithm>
00014 #include <functional>
00015 #include <sstream>
00016
00017 using namespace std;
00018
00019
00020 struct PCACmpPairDIFirst
00021 : public binary_function<pair<double,int>,pair<double,int>,bool> {
00022 bool operator()(const pair<double,int>& l, const pair<double,int>& r)
00023 const {
00024 return ( fabs(l.first) > fabs(r.first) );
00025 }
00026 };
00027
00028
00029 SprPCATransformer::SprPCATransformer()
00030 :
00031 SprAbsVarTransformer(),
00032 U_(),
00033 eigenValues_()
00034 {}
00035
00036
00037 SprPCATransformer::SprPCATransformer(const SprMatrix& U,
00038 const std::vector<
00039 std::pair<double,int> >& eigenValues)
00040 :
00041 SprAbsVarTransformer(),
00042 U_(U),
00043 eigenValues_(eigenValues)
00044 {}
00045
00046
00047 bool SprPCATransformer::train(const SprAbsFilter* data, int verbose)
00048 {
00049
00050 assert( data != 0 );
00051
00052
00053 eigenValues_.clear();
00054 U_ = SprMatrix();
00055
00056
00057 unsigned dim = data->dim();
00058 data->vars(oldVars_);
00059 assert( dim == oldVars_.size() );
00060 newVars_.clear();
00061 newVars_.resize(dim);
00062 for( int d=0;d<dim;d++ ) {
00063 ostringstream os;
00064 os << "pc" << d;
00065 newVars_[d] = os.str();
00066 }
00067
00068
00069 SprDataMoments moms(data);
00070 SprVector mean;
00071 SprSymMatrix cov;
00072 if( !moms.covariance(cov,mean) ) {
00073 cerr << "Unable to compute covariance matrix for PCA." << endl;
00074 return false;
00075 }
00076
00077
00078 SprMatrix U = cov.diagonalize().T();
00079 if( U.num_row()!=dim || U.num_col()!=dim ) {
00080 cerr << "Dimensionality of the PCA transformation matrix does not match."
00081 << endl;
00082 return false;
00083 }
00084
00085
00086 eigenValues_.resize(dim);
00087 for( int d=0;d<dim;d++ )
00088 eigenValues_[d] = pair<double,int>(cov[d][d],d);
00089 stable_sort(eigenValues_.begin(),eigenValues_.end(),PCACmpPairDIFirst());
00090 if( verbose > 0 ) {
00091 cout << "PCA eigenvalues: ";
00092 for( int d=0;d<dim;d++ ) cout << eigenValues_[d].first << " ";
00093 cout << endl;
00094 }
00095
00096
00097 U_ = U;
00098 for( int iold=0;iold<dim;iold++ ) {
00099 int inew = eigenValues_[iold].second;
00100 for( int j=0;j<dim;j++ )
00101 U_[inew][j] = U[iold][j];
00102 }
00103
00104
00105 return true;
00106 }
00107
00108
00109 void SprPCATransformer::transform(const std::vector<double>& in,
00110 std::vector<double>& out) const
00111 {
00112 int dim = U_.num_row();
00113 assert( in.size() == dim );
00114 SprVector v(in);
00115 out = (U_*v).std();
00116 }
00117
00118
00119 void SprPCATransformer::inverse(const std::vector<double>& in,
00120 std::vector<double>& out) const
00121 {
00122 int dim = U_.num_row();
00123 assert( in.size() == dim );
00124 SprVector v(in);
00125 out = (U_.T()*v).std();
00126 }
00127
00128
00129 void SprPCATransformer::print(std::ostream& os) const
00130 {
00131
00132 os << "VarTransformer: " << this->name().c_str()
00133 << " " << SprVersion.c_str() << endl;
00134
00135
00136 int dim = oldVars_.size();
00137 vector<string> vars(oldVars_);
00138
00139
00140 for( int i=0;i<dim;i++ ) {
00141 if( vars[i].find(' ') != string::npos )
00142 vars[i].erase(vars[i].find_first_of(' '));
00143 }
00144
00145
00146 os << "Dim: " << dim << endl;
00147
00148
00149 os << "Eigenvalues:";
00150 for( int d=0;d<dim;d++ )
00151 os << " " << eigenValues_[d].first;
00152 os << endl;
00153
00154
00155 os << "Indices:";
00156 for( int d=0;d<dim;d++ )
00157 os << " " << eigenValues_[d].second;
00158 os << endl;
00159
00160
00161 for( int i=0;i<dim;i++ ) {
00162 os << i << " " << newVars_[i].c_str() << "=";
00163 for( int j=0;j<dim;j++ )
00164 os << " + " << U_[i][j] << " *" << vars[j].c_str();
00165 os << endl;
00166 }
00167 }
00168
00169