diff --git a/SpMat.h b/SpMat.h index 07377c7b19f38ff9fb82750a8dc3d8db653f405a..ae81cc6fb155164c2ff91d1d8e3f1f7d8bed6210 100644 --- a/SpMat.h +++ b/SpMat.h @@ -25,6 +25,8 @@ #include <vector> #include <fstream> #include <iomanip> +#include <thread> +#include <chrono> #include "armawrap/newmat.h" #include "cg.h" #include "bicg.h" @@ -89,16 +91,18 @@ template<class T> class SpMat { public: - SpMat() : _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false) {} - SpMat(unsigned int m, unsigned int n) : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false) {} - SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp); - SpMat(const NEWMAT::GeneralMatrix& M); - SpMat(const std::string& fname); + SpMat() : _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false), _nt(1) {} + SpMat(unsigned int m, unsigned int n, unsigned int nt=1) : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false), _nt(nt) {} + SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt=1); + SpMat(const NEWMAT::GeneralMatrix& M, unsigned int nt=1); + SpMat(const std::string& fname, unsigned int nt=1); ~SpMat() {} unsigned int Nrows() const {return(_m);} unsigned int Ncols() const {return(_n);} unsigned int NZ() const {return(_nz);} + unsigned int Nthreads() const {return(_nt);} + void SetNthreads(unsigned int nt) {_nt=nt;} NEWMAT::ReturnMatrix AsNEWMAT() const; void Save(const std::string& fname, @@ -118,7 +122,7 @@ public: void Set(unsigned int r, unsigned int c, const T& v) {here(r,c) = v;} // Set a single value void SetColumn(unsigned int c, const NEWMAT::ColumnVector& col, double eps=0.0); // Set a whole column (obliterating what was there before) - SpMat<T>& MultiplyColumns(const NEWMAT::Matrix& vals); // Multiply each column of a matrix by a value + SpMat<T>& MultiplyColumns(const NEWMAT::Matrix& vals); // Multiply each column of a matrix by a value void AddTo(unsigned int r, unsigned int c, const T& v) {here(r,c) += v;} // Add value to a single (possibly existing) value SpMat<T>& operator+=(const SpMat& M) @@ -131,18 +135,18 @@ public: if (same_sparsity(M)) return(add_same_sparsity_mat_to_me(M,-1)); else return(add_diff_sparsity_mat_to_me(M,-1)); } - const NEWMAT::ReturnMatrix operator*(const NEWMAT::ColumnVector& x) const; // Multiplication with column vector - const NEWMAT::ReturnMatrix trans_mult(const NEWMAT::ColumnVector& x) const; // Multiplication of transpose with column vector + const NEWMAT::ReturnMatrix operator*(const NEWMAT::ColumnVector& x) const; // Multiplication with column vector. Has multi-thread support. + const NEWMAT::ReturnMatrix trans_mult(const NEWMAT::ColumnVector& x) const; // Multiplication of transpose with column vector. Has multi-thread support. const NEWMAT::ReturnMatrix TransMult(const NEWMAT::ColumnVector& x) const { return(trans_mult(x)); // Duplication for compatibility with IML++ } - const SpMat<T> TransMult(const SpMat<T>& B) const { return(this->t()*B); } // Multiplication of transpose(*this) with sparse matrix B + const SpMat<T> TransMult(const SpMat<T>& B) const { return(this->t()*B); } // Multiplication of transpose(*this) with sparse matrix B. Has multi-thread support. SpMat<T>& operator*=(double s); // Multiplication of self with scalar SpMat<T> operator-(const SpMat<T>& M) const {return(SpMat<T>(M) *= -1.0);} // Unary minus SpMat<T>& operator|=(const SpMat<T>& rh); // Hor concat to right SpMat<T>& operator&=(const SpMat<T>& bh); // Vert concat below - const SpMat<T> TransMultSelf() const {return(this->t() * *this);} // Returns transpose(*this)*(*this) + const SpMat<T> TransMultSelf() const {return(this->t() * *this);} // Returns transpose(*this)*(*this). Has multi-thread support. const SpMat<T> t() const; // Returns transpose(*this). @@ -182,9 +186,12 @@ public: ColumnIterator end(unsigned int col) const { if (col>_n) throw SpMatException("ColumnIterator: col out of range"); return(ColumnIterator(*this,col,true)); } template<class TT> - friend const SpMat<TT> operator*(const SpMat<TT>& lh, const SpMat<TT>& rh); // Multiplication of two sparse matrices + friend const SpMat<TT> operator*(const SpMat<TT>& lh, const SpMat<TT>& rh); // Multiplication of two sparse matrices. Has multi-thread support. - NEWMAT::ReturnMatrix SolveForx(const NEWMAT::ColumnVector& b, // Solve for x in b=(*this)*x + template<class TT> + friend bool operator==(const SpMat<TT>& lh, const SpMat<TT>& rh); + + NEWMAT::ReturnMatrix SolveForx(const NEWMAT::ColumnVector& b, // Solve for x in b=(*this)*x . Has multi-thread support. MatrixType type = UNKNOWN, double tol = 1e-4, unsigned int miter = 200, @@ -209,12 +216,13 @@ protected: T& here(unsigned int r, unsigned int c); private: - unsigned int _m; - unsigned int _n; - unsigned long _nz; - std::vector<std::vector<unsigned int> > _ri; - std::vector<std::vector<T> > _val; - bool _pw; // Print Warnings + unsigned int _m; /// No. of rows + unsigned int _n; /// No. of columns + unsigned long _nz; /// No. of non-zero elements + std::vector<std::vector<unsigned int> > _ri; /// Vector of vectors (one per column) of row-indicies + std::vector<std::vector<T> > _val; /// Vector of vectors (one per column) of values + bool _pw; /// Print Warnings + unsigned int _nt; /// Number of threads bool found(const std::vector<unsigned int>& ri, unsigned int key, int& pos) const; void insert(std::vector<unsigned int>& vec, int indx, unsigned int val); @@ -223,6 +231,37 @@ private: SpMat<T>& add_same_sparsity_mat_to_me(const SpMat<T>& M, double s); SpMat<T>& add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s); bool is_sorted() const; + bool is_odd(int numerator) const { return(numerator%2==1); } + std::vector<unsigned int> columns_per_thread(bool balance=true) const; + void mul_by_vec_helper(// Input + unsigned int first, + unsigned int last, + unsigned int tid, // Thread number + std::vector<std::thread>& threads, // All threads + const double *xp, + // Output + std::vector<double *> vec_bp) const; + void trans_mult_helper(// Input + unsigned int first, + unsigned int last, + const double *xp, + // Output + double *bp) const; + void matrix_mul_helper(// Input + unsigned int first, + unsigned int last, + const SpMat<T>& lh, + // Output + SpMat<T>& out) const; + void add_same_sparsity_helper(unsigned int first, + unsigned int last, + const SpMat<T>& M, + double s); + void add_diff_sparsity_helper(unsigned int first, + unsigned int last, + const SpMat<T>& M, + double s, + unsigned int& nz); }; //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -346,8 +385,8 @@ private: ///////////////////////////////////////////////////////////////////// template<class T> -SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp) - : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false) +SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt) +: _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false), _nt(nt) { _nz = jcp[n]; unsigned long nz = 0; @@ -376,8 +415,8 @@ SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const u ///////////////////////////////////////////////////////////////////// template<class T> -SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M) - : _m(M.Nrows()), _n(M.Ncols()), _nz(0), _ri(M.Ncols()), _val(M.Ncols()), _pw(false) +SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M, unsigned int nt) +: _m(M.Nrows()), _n(M.Ncols()), _nz(0), _ri(M.Ncols()), _val(M.Ncols()), _pw(false), _nt(nt) { double *m = static_cast<double *>(M.Store()); @@ -412,8 +451,8 @@ SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M) ///////////////////////////////////////////////////////////////////// template<class T> -SpMat<T>::SpMat(const std::string& fname) -: _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false) +SpMat<T>::SpMat(const std::string& fname, unsigned int nt) +: _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false), _nt(nt) { // First read data into (nz+1)x3 NEWMAT matrix NEWMAT::Matrix rcv; @@ -612,7 +651,7 @@ NEWMAT::ReturnMatrix SpMat<T>::SolveForx(const NEWMAT::ColumnVector& status = BiCG(*this,x,b,*M,liter,tol); break; default: - throw SpMatException("SolveForx: No idea how you got here. But you shouldn't be here, punk."); + throw SpMatException("SolveForx: No idea how you got here. But you shouldn't be here."); } if (status && _pw) { @@ -635,11 +674,12 @@ template<class T> const SpMat<T> SpMat<T>::t() const { SpMat<T> t_mat(_n,_m); - // First make a list of number of elements in each row of *this (columns of t_mat) + // First make a list of number of elements in each row of *this (columns of t_mat). std::vector<unsigned int> no_per_row(_m,static_cast<unsigned int>(0)); for (unsigned int i=0; i<_n; i++) { for (unsigned int j=0; j<_ri[i].size(); j++) no_per_row[_ri[i][j]]++; } + // A second pass to allocate storage for (unsigned int i=0; i<_m; i++) { t_mat._ri[i].resize(no_per_row[i]); @@ -764,18 +804,76 @@ const NEWMAT::ReturnMatrix SpMat<T>::operator*(const NEWMAT::ColumnVector& x) co const double *xp = static_cast<double *>(x.Store()); double *bp = static_cast<double *>(b.Store()); - for (unsigned int c=0; c<_n; c++) { + if (_nt==1) { // If we are to run single-threaded + for (unsigned int c=0; c<_n; c++) { + if (_ri[c].size()) { + double wgt = xp[c]; + const std::vector<unsigned int>& ri = _ri[c]; + const std::vector<T>& val = _val[c]; + for (unsigned int i=0; i<ri.size(); i++) { + bp[ri[i]] += static_cast<double>(wgt*val[i]); + } + } + } + } + else { // We are to run multi-threaded + std::vector<unsigned int> col_pt = this->columns_per_thread(); + std::vector<std::thread> threads(_nt-1); // + main thread makes _nt + std::unique_ptr<double[]> bp_at(new double[_m*(_nt-1)]); + std::vector<double *> vec_bp(_nt); + for (unsigned int i=0; i<_nt-1; i++) vec_bp[i] = &bp_at[i*_m]; + vec_bp[_nt-1] = bp; // This is where the results will be summed in the end + for (unsigned int i=0; i<_nt-1; i++) { + threads[i] = std::thread(&SpMat<T>::mul_by_vec_helper,this,col_pt[i], + col_pt[i+1],i,std::ref(threads),xp,std::ref(vec_bp)); + } + this->mul_by_vec_helper(col_pt[_nt-1],col_pt[_nt],_nt-1,threads,xp,vec_bp); + } + b.Release(); + return(b); +} + +template<class T> +void SpMat<T>::mul_by_vec_helper(// Input + unsigned int first, + unsigned int last, + unsigned int tid, // Thread number + std::vector<std::thread>& threads, + const double *xp, + // Output + std::vector<double *> vec_bp) const +{ + // static std::mutex cout_mutex; + + // Do the columns that you are resonsible for + double *bp = vec_bp[tid]; + std::memset(bp,0,_m*sizeof(double)); + for (unsigned int c=first; c<last; c++) { // If we are to run single-threaded if (_ri[c].size()) { double wgt = xp[c]; const std::vector<unsigned int>& ri = _ri[c]; const std::vector<T>& val = _val[c]; for (unsigned int i=0; i<ri.size(); i++) { - bp[ri[i]] += static_cast<double>(wgt*val[i]); + bp[ri[i]] += static_cast<double>(wgt*val[i]); } } } - b.Release(); - return(b); + // Then add up results with other threads + int step = 1; // How far to the left your neighbour is + while (step < _nt) { + if (is_odd(((int(_nt)-1)-int(tid))/step)) return; // Is not a candidate + int neighbour = int(tid)-step; // Check who your neighbour to the left is + if (neighbour < 0) return; // Return if neighbour is out of bounds + threads[neighbour].join(); // Make sure that neighbour has finished + // cout_mutex.lock(); + // std::cout << "I am thread # " << tid << ", and I will add thread # " << neighbour << " to myself" << std::endl; + // cout_mutex.unlock(); + double *my_p = vec_bp[tid]; + double *ne_p = vec_bp[neighbour]; + for (unsigned int i=0; i<_m; i++) my_p[i] += ne_p[i]; + step *= 2; + } + return; } ///////////////////////////////////////////////////////////////////// @@ -838,19 +936,52 @@ const NEWMAT::ReturnMatrix SpMat<T>::trans_mult(const NEWMAT::ColumnVector& x) c const double *xp = static_cast<double *>(x.Store()); double *bp = static_cast<double *>(b.Store()); - for (unsigned int c=0; c<_n; c++) { + if (_nt == 1) { // If we are to run single-threaded + for (unsigned int c=0; c<_n; c++) { + double res = 0.0; + if (_ri[c].size()) { + const std::vector<unsigned int>& ri = _ri[c]; + const std::vector<T>& val = _val[c]; + for (unsigned int i=0; i<ri.size(); i++) { + res += val[i]*xp[ri[i]]; + } + } + bp[c] = res; + } + } + else { // We are to run multi-threaded + std::vector<unsigned int> col_pt = this->columns_per_thread(); + std::vector<std::thread> threads(_nt-1); // + main thread makes _nt + for (unsigned int i=0; i<_nt-1; i++) { + threads[i] = std::thread(&SpMat<T>::trans_mult_helper,this,col_pt[i],col_pt[i+1],xp,bp); + } + this->trans_mult_helper(col_pt[_nt-1],col_pt[_nt],xp,bp); + std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join)); + } + b.Release(); + return(b); +} + +template<class T> +void SpMat<T>::trans_mult_helper(// Input + unsigned int first, + unsigned int last, + const double *xp, + // Output + double *bp) const +{ + for (unsigned int c=first; c<last; c++) { double res = 0.0; if (_ri[c].size()) { const std::vector<unsigned int>& ri = _ri[c]; const std::vector<T>& val = _val[c]; for (unsigned int i=0; i<ri.size(); i++) { - res += val[i]*xp[ri[i]]; + res += val[i]*xp[ri[i]]; } } bp[c] = res; } - b.Release(); - return(b); + return; } ///////////////////////////////////////////////////////////////////// @@ -945,20 +1076,75 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh) if (lh._n != rh._m) throw SpMatException("operator*: Left hand matrix must have same # of columns as right hand has rows"); SpMat<T> out(lh._m,rh._n); - Accumulator<T> acc(lh._m); + if (rh._nt==1) { // If we are to run single threaded + Accumulator<T> acc(lh._m); + + for (unsigned int cr=0; cr<rh._n; cr++) { + acc.Reset(); + if (rh._ri[cr].size()) { + const std::vector<unsigned int>& rri = rh._ri[cr]; + const std::vector<T>& rval = rh._val[cr]; + for (unsigned int i=0; i<rri.size(); i++) { + if (lh._ri[rri[i]].size()) { + double wgt = rval[i]; + const std::vector<unsigned int>& lri = lh._ri[rri[i]]; + const std::vector<T>& lval = lh._val[rri[i]]; + for (unsigned int j=0; j<lri.size(); j++) { + acc(lri[j]) += wgt*lval[j]; + } + } + } + } + out._ri[cr].resize(acc.NO()); + out._val[cr].resize(acc.NO()); + std::vector<unsigned int>& ori = out._ri[cr]; + std::vector<T>& oval = out._val[cr]; + for (unsigned int i=0; i<acc.NO(); i++) { + ori[i] = acc.ri(i); + oval[i] = acc.val(i); + } + out._nz += acc.NO(); + } + } + else { // We are to run multi-threaded + std::vector<unsigned int> cpt = rh.columns_per_thread(); + std::vector<std::thread> threads(rh._nt-1); // + main thread makes rh._nt + for (unsigned int i=0; i<rh._nt-1; i++) { + threads[i] = std::thread(&SpMat<T>::matrix_mul_helper,&rh,cpt[i],cpt[i+1],std::ref(lh),std::ref(out)); + } + rh.matrix_mul_helper(cpt[rh._nt-1],cpt[rh._nt],lh,out); + std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join)); + } - for (unsigned int cr=0; cr<rh._n; cr++) { + return(out); +} + +template<class T> +void SpMat<T>::matrix_mul_helper(// Input + unsigned int first, + unsigned int last, + const SpMat<T>& lh, + // Output + SpMat<T>& out) const +{ + static std::mutex nz_mutex; + + // N.B. this is called from the rh matrix, so this refers to that + Accumulator<T> acc(lh._m); + unsigned int lnz=0; // Local non-zero + + for (unsigned int cr=first; cr<last; cr++) { acc.Reset(); - if (rh._ri[cr].size()) { - const std::vector<unsigned int>& rri = rh._ri[cr]; - const std::vector<T>& rval = rh._val[cr]; + if (this->_ri[cr].size()) { + const std::vector<unsigned int>& rri = this->_ri[cr]; + const std::vector<T>& rval = this->_val[cr]; for (unsigned int i=0; i<rri.size(); i++) { - if (lh._ri[rri[i]].size()) { - double wgt = rval[i]; - const std::vector<unsigned int>& lri = lh._ri[rri[i]]; - const std::vector<T>& lval = lh._val[rri[i]]; - for (unsigned int j=0; j<lri.size(); j++) { - acc(lri[j]) += wgt*lval[j]; + if (lh._ri[rri[i]].size()) { + double wgt = rval[i]; + const std::vector<unsigned int>& lri = lh._ri[rri[i]]; + const std::vector<T>& lval = lh._val[rri[i]]; + for (unsigned int j=0; j<lri.size(); j++) { + acc(lri[j]) += wgt*lval[j]; } } } @@ -971,10 +1157,13 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh) ori[i] = acc.ri(i); oval[i] = acc.val(i); } - out._nz += acc.NO(); + lnz += acc.NO(); } + nz_mutex.lock(); + out._nz += lnz; + nz_mutex.unlock(); - return(out); + return; } ///////////////////////////////////////////////////////////////////// @@ -1069,6 +1258,37 @@ const SpMat<T> operator&(const SpMat<T>& th, const NEWMAT::GeneralMatrix& bh) return(SpMat<T>(th) &= SpMat<T>(bh)); } +///////////////////////////////////////////////////////////////////// +// +// Global functions for comparison +// +///////////////////////////////////////////////////////////////////// + +template<class T> +bool operator==(const SpMat<T>& lh, const SpMat<T>& rh) +{ + // Start by comparing dimensions and sparsity + if (lh._m != rh._m || lh._n != rh._n || lh._nz != rh._nz) return(false); + // Next compare non-zero elements per column + for (unsigned int i=0; i<lh._n; i++) { + if (lh._ri[i].size() != rh._ri[i].size()) return(false); + } + // Finally do a detailed comparison if neccessary + for (unsigned int i=0; i<lh._n; i++) { + for (unsigned int j=0; j<lh._ri[i].size(); j++) { + if (lh._ri[i][j] != rh._ri[i][j] || lh._val[i][j] != rh._val[i][j]) { + // cout << "lh._ri[" << i << "][" << j << "] = " << lh._ri[i][j] << ", rh._ri[" << i << "][" << j << "] = " << rh._ri[i][j] << endl; + // cout << "lh._val[" << i << "][" << j << "] = " << lh._val[i][j] << ", rh._val[" << i << "][" << j << "] = " << rh._val[i][j] << endl; + return(false); + } + } + } + return(true); +} + +template<class T> +bool operator!=(const SpMat<T>& lh, const SpMat<T>& rh) { return(!(lh==rh)); } + /*################################################################### ## ## Here starts protected functions @@ -1211,16 +1431,46 @@ bool SpMat<T>::same_sparsity(const SpMat<T>& M) const template<class T> SpMat<T>& SpMat<T>::add_same_sparsity_mat_to_me(const SpMat<T>& M, double s) { - for (unsigned int c=0; c<_n; c++) { + if (_nt==1) { // If we are to run single threaded + for (unsigned int c=0; c<_n; c++) { + if (_val[c].size()) { + std::vector<T>& val = _val[c]; + const std::vector<T>& Mval = M._val[c]; + for (unsigned int i=0; i<val.size(); i++) { + val[i] += s*Mval[i]; + } + } + } + } + else { // We are to run multi threaded + std::vector<unsigned int> col_pt = this->columns_per_thread(); + std::vector<std::thread> threads(_nt-1); // + main thread makes _nt + for (unsigned int i=0; i<_nt-1; i++) { + threads[i] = std::thread(&SpMat<T>::add_same_sparsity_helper,this, + col_pt[i],col_pt[i+1],std::ref(M),s); + } + this->add_same_sparsity_helper(col_pt[_nt-1],col_pt[_nt],M,s); + std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join)); + } + return(*this); +} + +template<class T> +void SpMat<T>::add_same_sparsity_helper(unsigned int first, + unsigned int last, + const SpMat<T>& M, + double s) +{ + for (unsigned int c=first; c<last; c++) { if (_val[c].size()) { std::vector<T>& val = _val[c]; const std::vector<T>& Mval = M._val[c]; for (unsigned int i=0; i<val.size(); i++) { - val[i] += s*Mval[i]; + val[i] += s*Mval[i]; } } } - return(*this); + return; } ///////////////////////////////////////////////////////////////////// @@ -1234,10 +1484,58 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) { if (_m != M._m || _n != M._n) throw SpMatException("add_diff_sparsity_mat_to_me: Size mismatch between matrices"); + if (_nt==1) { // If we are to run single threaded + Accumulator<T> acc(_m); + _nz = 0; + for (unsigned int c=0; c<_n; c++) { + acc.Reset(); + if (M._ri[c].size()) { + const std::vector<unsigned int>& Mri = M._ri[c]; + const std::vector<T>& Mval = M._val[c]; + for (unsigned int i=0; i<Mri.size(); i++) { + acc(Mri[i]) += s*Mval[i]; + } + std::vector<unsigned int>& ri = _ri[c]; + std::vector<T>& val = _val[c]; + for (unsigned int i=0; i<ri.size(); i++) { + acc(ri[i]) += val[i]; + } + ri.resize(acc.NO()); + val.resize(acc.NO()); + for (unsigned int i=0; i<acc.NO(); i++) { + ri[i] = acc.ri(i); + val[i] = acc.val(i); + } + _nz += acc.NO(); + } + } + } + else { // We are to run multi-threaded + std::vector<unsigned int> col_pt = this->columns_per_thread(false); + std::vector<std::thread> threads(_nt-1); // + main thread makes _nt + std::vector<unsigned int> nz(_nt,0); // Non-zero elements per thread + for (unsigned int i=0; i<_nt-1; i++) { + threads[i] = std::thread(&SpMat<T>::add_diff_sparsity_helper,this,col_pt[i], + col_pt[i+1],std::ref(M),s,std::ref(nz[i])); + } + this->add_diff_sparsity_helper(col_pt[_nt-1],col_pt[_nt],M,s,nz[_nt-1]); + std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join)); + _nz = std::accumulate(nz.begin(),nz.end(),0u); + } + return(*this); +} + +template<class T> +void SpMat<T>::add_diff_sparsity_helper(unsigned int first, + unsigned int last, + const SpMat<T>& M, + double s, + unsigned int& nz) +{ Accumulator<T> acc(_m); - _nz = 0; - for (unsigned int c=0; c<_n; c++) { + nz = 0; + for (unsigned int c=first; c<last; c++) { acc.Reset(); if (M._ri[c].size()) { const std::vector<unsigned int>& Mri = M._ri[c]; @@ -1256,10 +1554,10 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) ri[i] = acc.ri(i); val[i] = acc.val(i); } - _nz += acc.NO(); + nz += acc.NO(); } } - return(*this); + return; } ///////////////////////////////////////////////////////////////////// @@ -1283,6 +1581,47 @@ bool SpMat<T>::is_sorted() const return(true); } +///////////////////////////////////////////////////////////////////// +// +// Returns a vector vec where vec[i] and vec[i+1] contains the first +// column and one past the last column respectively for thread i. +// It is based on trying to balance the number of non-zero elements +// between the different threads. +// +///////////////////////////////////////////////////////////////////// +template<class T> +std::vector<unsigned int> SpMat<T>::columns_per_thread(bool balance) const +{ + std::vector<unsigned int> cpt(_nt+1,0); + if (balance) { // Try to balance number of non-zero elements between threads + unsigned int nnz_per_thread = std::lround(static_cast<double>(_nz)/static_cast<double>(_nt)); + unsigned int cum_nnz = 0; + unsigned int ti = 1; + for (unsigned int c=0; c<_n; c++) { + cum_nnz += _ri[c].size(); + if (cum_nnz >= nnz_per_thread) { + if (std::abs(int(cum_nnz - nnz_per_thread)) < std::abs(int(cum_nnz - _ri[c].size() - nnz_per_thread))) { // If this c is best + cpt[ti] = c+1; + cum_nnz = 0; + } + else { // If previous c is best + cpt[ti] = c; + cum_nnz = _ri[c].size(); + } + if (ti < _nt-1) ti++; + else break; + } + } + cpt[_nt] = _n; + } + else { // If we just want the same number of columns + double dcpt = static_cast<double>(_n) / static_cast<double>(_nt); + for (unsigned int i=1; i<_nt; i++) cpt[i] = static_cast<unsigned int>(std::floor(i*dcpt)); + cpt[_nt] = _n; + } + return(cpt); +} + /* template<class T> SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) diff --git a/bfmatrix.h b/bfmatrix.h index 2ba2d7f6ec544a1fa948f43cf2a0b566097fe531..92ef6e39cdd64155632cbf8ed985cd0d0088c39a 100644 --- a/bfmatrix.h +++ b/bfmatrix.h @@ -76,6 +76,9 @@ public: // Basic properties virtual unsigned int Nrows() const = 0; virtual unsigned int Ncols() const = 0; + virtual unsigned int Nthreads() const = 0; + virtual void SetNthreads(unsigned int nt) = 0; + virtual unsigned int NZ() const = 0; // Print matrix (for debugging) virtual void Print(const std::string fname=std::string("")) const = 0; @@ -137,14 +140,14 @@ public: // Constructors, destructor and assignment SparseBFMatrix() : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>())) {} - SparseBFMatrix(unsigned int m, unsigned int n) - : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n))) {} - SparseBFMatrix(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp) - : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,irp,jcp,sp))) {} + SparseBFMatrix(unsigned int m, unsigned int n, unsigned int nt=1) + : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,nt))) {} + SparseBFMatrix(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt=1) + : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,irp,jcp,sp,nt))) {} SparseBFMatrix(const MISCMATHS::SpMat<T>& M) : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M))) {} - SparseBFMatrix(const NEWMAT::Matrix& M) - : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M))) {} + SparseBFMatrix(const NEWMAT::Matrix& M, unsigned int nt=1) + : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M,nt))) {} SparseBFMatrix(const SparseBFMatrix<T>& M) : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(*(M.mp)))) {} virtual ~SparseBFMatrix() {} @@ -157,9 +160,15 @@ public: // Access as NEWMAT::Matrix virtual NEWMAT::ReturnMatrix AsMatrix() const {NEWMAT::Matrix ret; ret = mp->AsNEWMAT(); ret.Release(); return(ret);} + // Access as MISCMATHS::SpMat<T> matrix. Cannot be used from pointer to base-class. + MISCMATHS::SpMat<T> AsSpMat() const {return(*mp);} + // Basic properties virtual unsigned int Nrows() const {return(mp->Nrows());} virtual unsigned int Ncols() const {return(mp->Ncols());} + virtual unsigned int Nthreads() const {return(mp->Nthreads());} + virtual void SetNthreads(unsigned int nt) {mp->SetNthreads(nt);} + virtual unsigned int NZ() const {return(mp->NZ());} // Print matrix (for debugging) virtual void Print(const std::string fname=std::string("")) const {mp->Print(fname);} @@ -236,6 +245,9 @@ public: // Basic properties virtual unsigned int Nrows() const {return(mp->Nrows());} virtual unsigned int Ncols() const {return(mp->Ncols());} + virtual unsigned int Nthreads() const {return(1);} + virtual void SetNthreads(unsigned int nt) {} // Null function + virtual unsigned int NZ() const {return(mp->Nrows()*mp->Ncols());} // Print matrix (for debugging) virtual void Print(const std::string fname=std::string("")) const;