Skip to content
Snippets Groups Projects
Commit 200332df authored by Jesper Andersson's avatar Jesper Andersson Committed by Paul McCarthy
Browse files

CPU parallelisation of many of the more time consuming operations

parent d4b96e96
No related branches found
No related tags found
1 merge request!10CPU parallelisation of many of the more time consuming operations
Pipeline #12277 passed
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <vector> #include <vector>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <thread>
#include <chrono>
#include "armawrap/newmat.h" #include "armawrap/newmat.h"
#include "cg.h" #include "cg.h"
#include "bicg.h" #include "bicg.h"
...@@ -89,16 +91,18 @@ template<class T> ...@@ -89,16 +91,18 @@ template<class T>
class SpMat class SpMat
{ {
public: public:
SpMat() : _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false) {} SpMat() : _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false), _nt(1) {}
SpMat(unsigned int m, unsigned int n) : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false) {} SpMat(unsigned int m, unsigned int n, unsigned int nt=1) : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false), _nt(nt) {}
SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp); SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt=1);
SpMat(const NEWMAT::GeneralMatrix& M); SpMat(const NEWMAT::GeneralMatrix& M, unsigned int nt=1);
SpMat(const std::string& fname); SpMat(const std::string& fname, unsigned int nt=1);
~SpMat() {} ~SpMat() {}
unsigned int Nrows() const {return(_m);} unsigned int Nrows() const {return(_m);}
unsigned int Ncols() const {return(_n);} unsigned int Ncols() const {return(_n);}
unsigned int NZ() const {return(_nz);} unsigned int NZ() const {return(_nz);}
unsigned int Nthreads() const {return(_nt);}
void SetNthreads(unsigned int nt) {_nt=nt;}
NEWMAT::ReturnMatrix AsNEWMAT() const; NEWMAT::ReturnMatrix AsNEWMAT() const;
void Save(const std::string& fname, void Save(const std::string& fname,
...@@ -118,7 +122,7 @@ public: ...@@ -118,7 +122,7 @@ public:
void Set(unsigned int r, unsigned int c, const T& v) {here(r,c) = v;} // Set a single value void Set(unsigned int r, unsigned int c, const T& v) {here(r,c) = v;} // Set a single value
void SetColumn(unsigned int c, const NEWMAT::ColumnVector& col, double eps=0.0); // Set a whole column (obliterating what was there before) void SetColumn(unsigned int c, const NEWMAT::ColumnVector& col, double eps=0.0); // Set a whole column (obliterating what was there before)
SpMat<T>& MultiplyColumns(const NEWMAT::Matrix& vals); // Multiply each column of a matrix by a value SpMat<T>& MultiplyColumns(const NEWMAT::Matrix& vals); // Multiply each column of a matrix by a value
void AddTo(unsigned int r, unsigned int c, const T& v) {here(r,c) += v;} // Add value to a single (possibly existing) value void AddTo(unsigned int r, unsigned int c, const T& v) {here(r,c) += v;} // Add value to a single (possibly existing) value
SpMat<T>& operator+=(const SpMat& M) SpMat<T>& operator+=(const SpMat& M)
...@@ -131,18 +135,18 @@ public: ...@@ -131,18 +135,18 @@ public:
if (same_sparsity(M)) return(add_same_sparsity_mat_to_me(M,-1)); if (same_sparsity(M)) return(add_same_sparsity_mat_to_me(M,-1));
else return(add_diff_sparsity_mat_to_me(M,-1)); else return(add_diff_sparsity_mat_to_me(M,-1));
} }
const NEWMAT::ReturnMatrix operator*(const NEWMAT::ColumnVector& x) const; // Multiplication with column vector const NEWMAT::ReturnMatrix operator*(const NEWMAT::ColumnVector& x) const; // Multiplication with column vector. Has multi-thread support.
const NEWMAT::ReturnMatrix trans_mult(const NEWMAT::ColumnVector& x) const; // Multiplication of transpose with column vector const NEWMAT::ReturnMatrix trans_mult(const NEWMAT::ColumnVector& x) const; // Multiplication of transpose with column vector. Has multi-thread support.
const NEWMAT::ReturnMatrix TransMult(const NEWMAT::ColumnVector& x) const { const NEWMAT::ReturnMatrix TransMult(const NEWMAT::ColumnVector& x) const {
return(trans_mult(x)); // Duplication for compatibility with IML++ return(trans_mult(x)); // Duplication for compatibility with IML++
} }
const SpMat<T> TransMult(const SpMat<T>& B) const { return(this->t()*B); } // Multiplication of transpose(*this) with sparse matrix B const SpMat<T> TransMult(const SpMat<T>& B) const { return(this->t()*B); } // Multiplication of transpose(*this) with sparse matrix B. Has multi-thread support.
SpMat<T>& operator*=(double s); // Multiplication of self with scalar SpMat<T>& operator*=(double s); // Multiplication of self with scalar
SpMat<T> operator-(const SpMat<T>& M) const {return(SpMat<T>(M) *= -1.0);} // Unary minus SpMat<T> operator-(const SpMat<T>& M) const {return(SpMat<T>(M) *= -1.0);} // Unary minus
SpMat<T>& operator|=(const SpMat<T>& rh); // Hor concat to right SpMat<T>& operator|=(const SpMat<T>& rh); // Hor concat to right
SpMat<T>& operator&=(const SpMat<T>& bh); // Vert concat below SpMat<T>& operator&=(const SpMat<T>& bh); // Vert concat below
const SpMat<T> TransMultSelf() const {return(this->t() * *this);} // Returns transpose(*this)*(*this) const SpMat<T> TransMultSelf() const {return(this->t() * *this);} // Returns transpose(*this)*(*this). Has multi-thread support.
const SpMat<T> t() const; // Returns transpose(*this). const SpMat<T> t() const; // Returns transpose(*this).
...@@ -182,9 +186,12 @@ public: ...@@ -182,9 +186,12 @@ public:
ColumnIterator end(unsigned int col) const { if (col>_n) throw SpMatException("ColumnIterator: col out of range"); return(ColumnIterator(*this,col,true)); } ColumnIterator end(unsigned int col) const { if (col>_n) throw SpMatException("ColumnIterator: col out of range"); return(ColumnIterator(*this,col,true)); }
template<class TT> template<class TT>
friend const SpMat<TT> operator*(const SpMat<TT>& lh, const SpMat<TT>& rh); // Multiplication of two sparse matrices friend const SpMat<TT> operator*(const SpMat<TT>& lh, const SpMat<TT>& rh); // Multiplication of two sparse matrices. Has multi-thread support.
NEWMAT::ReturnMatrix SolveForx(const NEWMAT::ColumnVector& b, // Solve for x in b=(*this)*x template<class TT>
friend bool operator==(const SpMat<TT>& lh, const SpMat<TT>& rh);
NEWMAT::ReturnMatrix SolveForx(const NEWMAT::ColumnVector& b, // Solve for x in b=(*this)*x . Has multi-thread support.
MatrixType type = UNKNOWN, MatrixType type = UNKNOWN,
double tol = 1e-4, double tol = 1e-4,
unsigned int miter = 200, unsigned int miter = 200,
...@@ -209,12 +216,13 @@ protected: ...@@ -209,12 +216,13 @@ protected:
T& here(unsigned int r, unsigned int c); T& here(unsigned int r, unsigned int c);
private: private:
unsigned int _m; unsigned int _m; /// No. of rows
unsigned int _n; unsigned int _n; /// No. of columns
unsigned long _nz; unsigned long _nz; /// No. of non-zero elements
std::vector<std::vector<unsigned int> > _ri; std::vector<std::vector<unsigned int> > _ri; /// Vector of vectors (one per column) of row-indicies
std::vector<std::vector<T> > _val; std::vector<std::vector<T> > _val; /// Vector of vectors (one per column) of values
bool _pw; // Print Warnings bool _pw; /// Print Warnings
unsigned int _nt; /// Number of threads
bool found(const std::vector<unsigned int>& ri, unsigned int key, int& pos) const; bool found(const std::vector<unsigned int>& ri, unsigned int key, int& pos) const;
void insert(std::vector<unsigned int>& vec, int indx, unsigned int val); void insert(std::vector<unsigned int>& vec, int indx, unsigned int val);
...@@ -223,6 +231,37 @@ private: ...@@ -223,6 +231,37 @@ private:
SpMat<T>& add_same_sparsity_mat_to_me(const SpMat<T>& M, double s); SpMat<T>& add_same_sparsity_mat_to_me(const SpMat<T>& M, double s);
SpMat<T>& add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s); SpMat<T>& add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s);
bool is_sorted() const; bool is_sorted() const;
bool is_odd(int numerator) const { return(numerator%2==1); }
std::vector<unsigned int> columns_per_thread(bool balance=true) const;
void mul_by_vec_helper(// Input
unsigned int first,
unsigned int last,
unsigned int tid, // Thread number
std::vector<std::thread>& threads, // All threads
const double *xp,
// Output
std::vector<double *> vec_bp) const;
void trans_mult_helper(// Input
unsigned int first,
unsigned int last,
const double *xp,
// Output
double *bp) const;
void matrix_mul_helper(// Input
unsigned int first,
unsigned int last,
const SpMat<T>& lh,
// Output
SpMat<T>& out) const;
void add_same_sparsity_helper(unsigned int first,
unsigned int last,
const SpMat<T>& M,
double s);
void add_diff_sparsity_helper(unsigned int first,
unsigned int last,
const SpMat<T>& M,
double s,
unsigned int& nz);
}; };
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
...@@ -346,8 +385,8 @@ private: ...@@ -346,8 +385,8 @@ private:
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
template<class T> template<class T>
SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp) SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt)
: _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false) : _m(m), _n(n), _nz(0), _ri(n), _val(n), _pw(false), _nt(nt)
{ {
_nz = jcp[n]; _nz = jcp[n];
unsigned long nz = 0; unsigned long nz = 0;
...@@ -376,8 +415,8 @@ SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const u ...@@ -376,8 +415,8 @@ SpMat<T>::SpMat(unsigned int m, unsigned int n, const unsigned int *irp, const u
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
template<class T> template<class T>
SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M) SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M, unsigned int nt)
: _m(M.Nrows()), _n(M.Ncols()), _nz(0), _ri(M.Ncols()), _val(M.Ncols()), _pw(false) : _m(M.Nrows()), _n(M.Ncols()), _nz(0), _ri(M.Ncols()), _val(M.Ncols()), _pw(false), _nt(nt)
{ {
double *m = static_cast<double *>(M.Store()); double *m = static_cast<double *>(M.Store());
...@@ -412,8 +451,8 @@ SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M) ...@@ -412,8 +451,8 @@ SpMat<T>::SpMat(const NEWMAT::GeneralMatrix& M)
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
template<class T> template<class T>
SpMat<T>::SpMat(const std::string& fname) SpMat<T>::SpMat(const std::string& fname, unsigned int nt)
: _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false) : _m(0), _n(0), _nz(0), _ri(0), _val(0), _pw(false), _nt(nt)
{ {
// First read data into (nz+1)x3 NEWMAT matrix // First read data into (nz+1)x3 NEWMAT matrix
NEWMAT::Matrix rcv; NEWMAT::Matrix rcv;
...@@ -612,7 +651,7 @@ NEWMAT::ReturnMatrix SpMat<T>::SolveForx(const NEWMAT::ColumnVector& ...@@ -612,7 +651,7 @@ NEWMAT::ReturnMatrix SpMat<T>::SolveForx(const NEWMAT::ColumnVector&
status = BiCG(*this,x,b,*M,liter,tol); status = BiCG(*this,x,b,*M,liter,tol);
break; break;
default: default:
throw SpMatException("SolveForx: No idea how you got here. But you shouldn't be here, punk."); throw SpMatException("SolveForx: No idea how you got here. But you shouldn't be here.");
} }
if (status && _pw) { if (status && _pw) {
...@@ -635,11 +674,12 @@ template<class T> ...@@ -635,11 +674,12 @@ template<class T>
const SpMat<T> SpMat<T>::t() const const SpMat<T> SpMat<T>::t() const
{ {
SpMat<T> t_mat(_n,_m); SpMat<T> t_mat(_n,_m);
// First make a list of number of elements in each row of *this (columns of t_mat) // First make a list of number of elements in each row of *this (columns of t_mat).
std::vector<unsigned int> no_per_row(_m,static_cast<unsigned int>(0)); std::vector<unsigned int> no_per_row(_m,static_cast<unsigned int>(0));
for (unsigned int i=0; i<_n; i++) { for (unsigned int i=0; i<_n; i++) {
for (unsigned int j=0; j<_ri[i].size(); j++) no_per_row[_ri[i][j]]++; for (unsigned int j=0; j<_ri[i].size(); j++) no_per_row[_ri[i][j]]++;
} }
// A second pass to allocate storage // A second pass to allocate storage
for (unsigned int i=0; i<_m; i++) { for (unsigned int i=0; i<_m; i++) {
t_mat._ri[i].resize(no_per_row[i]); t_mat._ri[i].resize(no_per_row[i]);
...@@ -764,18 +804,76 @@ const NEWMAT::ReturnMatrix SpMat<T>::operator*(const NEWMAT::ColumnVector& x) co ...@@ -764,18 +804,76 @@ const NEWMAT::ReturnMatrix SpMat<T>::operator*(const NEWMAT::ColumnVector& x) co
const double *xp = static_cast<double *>(x.Store()); const double *xp = static_cast<double *>(x.Store());
double *bp = static_cast<double *>(b.Store()); double *bp = static_cast<double *>(b.Store());
for (unsigned int c=0; c<_n; c++) { if (_nt==1) { // If we are to run single-threaded
for (unsigned int c=0; c<_n; c++) {
if (_ri[c].size()) {
double wgt = xp[c];
const std::vector<unsigned int>& ri = _ri[c];
const std::vector<T>& val = _val[c];
for (unsigned int i=0; i<ri.size(); i++) {
bp[ri[i]] += static_cast<double>(wgt*val[i]);
}
}
}
}
else { // We are to run multi-threaded
std::vector<unsigned int> col_pt = this->columns_per_thread();
std::vector<std::thread> threads(_nt-1); // + main thread makes _nt
std::unique_ptr<double[]> bp_at(new double[_m*(_nt-1)]);
std::vector<double *> vec_bp(_nt);
for (unsigned int i=0; i<_nt-1; i++) vec_bp[i] = &bp_at[i*_m];
vec_bp[_nt-1] = bp; // This is where the results will be summed in the end
for (unsigned int i=0; i<_nt-1; i++) {
threads[i] = std::thread(&SpMat<T>::mul_by_vec_helper,this,col_pt[i],
col_pt[i+1],i,std::ref(threads),xp,std::ref(vec_bp));
}
this->mul_by_vec_helper(col_pt[_nt-1],col_pt[_nt],_nt-1,threads,xp,vec_bp);
}
b.Release();
return(b);
}
template<class T>
void SpMat<T>::mul_by_vec_helper(// Input
unsigned int first,
unsigned int last,
unsigned int tid, // Thread number
std::vector<std::thread>& threads,
const double *xp,
// Output
std::vector<double *> vec_bp) const
{
// static std::mutex cout_mutex;
// Do the columns that you are resonsible for
double *bp = vec_bp[tid];
std::memset(bp,0,_m*sizeof(double));
for (unsigned int c=first; c<last; c++) { // If we are to run single-threaded
if (_ri[c].size()) { if (_ri[c].size()) {
double wgt = xp[c]; double wgt = xp[c];
const std::vector<unsigned int>& ri = _ri[c]; const std::vector<unsigned int>& ri = _ri[c];
const std::vector<T>& val = _val[c]; const std::vector<T>& val = _val[c];
for (unsigned int i=0; i<ri.size(); i++) { for (unsigned int i=0; i<ri.size(); i++) {
bp[ri[i]] += static_cast<double>(wgt*val[i]); bp[ri[i]] += static_cast<double>(wgt*val[i]);
} }
} }
} }
b.Release(); // Then add up results with other threads
return(b); int step = 1; // How far to the left your neighbour is
while (step < _nt) {
if (is_odd(((int(_nt)-1)-int(tid))/step)) return; // Is not a candidate
int neighbour = int(tid)-step; // Check who your neighbour to the left is
if (neighbour < 0) return; // Return if neighbour is out of bounds
threads[neighbour].join(); // Make sure that neighbour has finished
// cout_mutex.lock();
// std::cout << "I am thread # " << tid << ", and I will add thread # " << neighbour << " to myself" << std::endl;
// cout_mutex.unlock();
double *my_p = vec_bp[tid];
double *ne_p = vec_bp[neighbour];
for (unsigned int i=0; i<_m; i++) my_p[i] += ne_p[i];
step *= 2;
}
return;
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
...@@ -838,19 +936,52 @@ const NEWMAT::ReturnMatrix SpMat<T>::trans_mult(const NEWMAT::ColumnVector& x) c ...@@ -838,19 +936,52 @@ const NEWMAT::ReturnMatrix SpMat<T>::trans_mult(const NEWMAT::ColumnVector& x) c
const double *xp = static_cast<double *>(x.Store()); const double *xp = static_cast<double *>(x.Store());
double *bp = static_cast<double *>(b.Store()); double *bp = static_cast<double *>(b.Store());
for (unsigned int c=0; c<_n; c++) { if (_nt == 1) { // If we are to run single-threaded
for (unsigned int c=0; c<_n; c++) {
double res = 0.0;
if (_ri[c].size()) {
const std::vector<unsigned int>& ri = _ri[c];
const std::vector<T>& val = _val[c];
for (unsigned int i=0; i<ri.size(); i++) {
res += val[i]*xp[ri[i]];
}
}
bp[c] = res;
}
}
else { // We are to run multi-threaded
std::vector<unsigned int> col_pt = this->columns_per_thread();
std::vector<std::thread> threads(_nt-1); // + main thread makes _nt
for (unsigned int i=0; i<_nt-1; i++) {
threads[i] = std::thread(&SpMat<T>::trans_mult_helper,this,col_pt[i],col_pt[i+1],xp,bp);
}
this->trans_mult_helper(col_pt[_nt-1],col_pt[_nt],xp,bp);
std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join));
}
b.Release();
return(b);
}
template<class T>
void SpMat<T>::trans_mult_helper(// Input
unsigned int first,
unsigned int last,
const double *xp,
// Output
double *bp) const
{
for (unsigned int c=first; c<last; c++) {
double res = 0.0; double res = 0.0;
if (_ri[c].size()) { if (_ri[c].size()) {
const std::vector<unsigned int>& ri = _ri[c]; const std::vector<unsigned int>& ri = _ri[c];
const std::vector<T>& val = _val[c]; const std::vector<T>& val = _val[c];
for (unsigned int i=0; i<ri.size(); i++) { for (unsigned int i=0; i<ri.size(); i++) {
res += val[i]*xp[ri[i]]; res += val[i]*xp[ri[i]];
} }
} }
bp[c] = res; bp[c] = res;
} }
b.Release(); return;
return(b);
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
...@@ -945,20 +1076,75 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh) ...@@ -945,20 +1076,75 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh)
if (lh._n != rh._m) throw SpMatException("operator*: Left hand matrix must have same # of columns as right hand has rows"); if (lh._n != rh._m) throw SpMatException("operator*: Left hand matrix must have same # of columns as right hand has rows");
SpMat<T> out(lh._m,rh._n); SpMat<T> out(lh._m,rh._n);
Accumulator<T> acc(lh._m); if (rh._nt==1) { // If we are to run single threaded
Accumulator<T> acc(lh._m);
for (unsigned int cr=0; cr<rh._n; cr++) {
acc.Reset();
if (rh._ri[cr].size()) {
const std::vector<unsigned int>& rri = rh._ri[cr];
const std::vector<T>& rval = rh._val[cr];
for (unsigned int i=0; i<rri.size(); i++) {
if (lh._ri[rri[i]].size()) {
double wgt = rval[i];
const std::vector<unsigned int>& lri = lh._ri[rri[i]];
const std::vector<T>& lval = lh._val[rri[i]];
for (unsigned int j=0; j<lri.size(); j++) {
acc(lri[j]) += wgt*lval[j];
}
}
}
}
out._ri[cr].resize(acc.NO());
out._val[cr].resize(acc.NO());
std::vector<unsigned int>& ori = out._ri[cr];
std::vector<T>& oval = out._val[cr];
for (unsigned int i=0; i<acc.NO(); i++) {
ori[i] = acc.ri(i);
oval[i] = acc.val(i);
}
out._nz += acc.NO();
}
}
else { // We are to run multi-threaded
std::vector<unsigned int> cpt = rh.columns_per_thread();
std::vector<std::thread> threads(rh._nt-1); // + main thread makes rh._nt
for (unsigned int i=0; i<rh._nt-1; i++) {
threads[i] = std::thread(&SpMat<T>::matrix_mul_helper,&rh,cpt[i],cpt[i+1],std::ref(lh),std::ref(out));
}
rh.matrix_mul_helper(cpt[rh._nt-1],cpt[rh._nt],lh,out);
std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join));
}
for (unsigned int cr=0; cr<rh._n; cr++) { return(out);
}
template<class T>
void SpMat<T>::matrix_mul_helper(// Input
unsigned int first,
unsigned int last,
const SpMat<T>& lh,
// Output
SpMat<T>& out) const
{
static std::mutex nz_mutex;
// N.B. this is called from the rh matrix, so this refers to that
Accumulator<T> acc(lh._m);
unsigned int lnz=0; // Local non-zero
for (unsigned int cr=first; cr<last; cr++) {
acc.Reset(); acc.Reset();
if (rh._ri[cr].size()) { if (this->_ri[cr].size()) {
const std::vector<unsigned int>& rri = rh._ri[cr]; const std::vector<unsigned int>& rri = this->_ri[cr];
const std::vector<T>& rval = rh._val[cr]; const std::vector<T>& rval = this->_val[cr];
for (unsigned int i=0; i<rri.size(); i++) { for (unsigned int i=0; i<rri.size(); i++) {
if (lh._ri[rri[i]].size()) { if (lh._ri[rri[i]].size()) {
double wgt = rval[i]; double wgt = rval[i];
const std::vector<unsigned int>& lri = lh._ri[rri[i]]; const std::vector<unsigned int>& lri = lh._ri[rri[i]];
const std::vector<T>& lval = lh._val[rri[i]]; const std::vector<T>& lval = lh._val[rri[i]];
for (unsigned int j=0; j<lri.size(); j++) { for (unsigned int j=0; j<lri.size(); j++) {
acc(lri[j]) += wgt*lval[j]; acc(lri[j]) += wgt*lval[j];
} }
} }
} }
...@@ -971,10 +1157,13 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh) ...@@ -971,10 +1157,13 @@ const SpMat<T> operator*(const SpMat<T>& lh, const SpMat<T>& rh)
ori[i] = acc.ri(i); ori[i] = acc.ri(i);
oval[i] = acc.val(i); oval[i] = acc.val(i);
} }
out._nz += acc.NO(); lnz += acc.NO();
} }
nz_mutex.lock();
out._nz += lnz;
nz_mutex.unlock();
return(out); return;
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
...@@ -1069,6 +1258,37 @@ const SpMat<T> operator&(const SpMat<T>& th, const NEWMAT::GeneralMatrix& bh) ...@@ -1069,6 +1258,37 @@ const SpMat<T> operator&(const SpMat<T>& th, const NEWMAT::GeneralMatrix& bh)
return(SpMat<T>(th) &= SpMat<T>(bh)); return(SpMat<T>(th) &= SpMat<T>(bh));
} }
/////////////////////////////////////////////////////////////////////
//
// Global functions for comparison
//
/////////////////////////////////////////////////////////////////////
template<class T>
bool operator==(const SpMat<T>& lh, const SpMat<T>& rh)
{
// Start by comparing dimensions and sparsity
if (lh._m != rh._m || lh._n != rh._n || lh._nz != rh._nz) return(false);
// Next compare non-zero elements per column
for (unsigned int i=0; i<lh._n; i++) {
if (lh._ri[i].size() != rh._ri[i].size()) return(false);
}
// Finally do a detailed comparison if neccessary
for (unsigned int i=0; i<lh._n; i++) {
for (unsigned int j=0; j<lh._ri[i].size(); j++) {
if (lh._ri[i][j] != rh._ri[i][j] || lh._val[i][j] != rh._val[i][j]) {
// cout << "lh._ri[" << i << "][" << j << "] = " << lh._ri[i][j] << ", rh._ri[" << i << "][" << j << "] = " << rh._ri[i][j] << endl;
// cout << "lh._val[" << i << "][" << j << "] = " << lh._val[i][j] << ", rh._val[" << i << "][" << j << "] = " << rh._val[i][j] << endl;
return(false);
}
}
}
return(true);
}
template<class T>
bool operator!=(const SpMat<T>& lh, const SpMat<T>& rh) { return(!(lh==rh)); }
/*################################################################### /*###################################################################
## ##
## Here starts protected functions ## Here starts protected functions
...@@ -1211,16 +1431,46 @@ bool SpMat<T>::same_sparsity(const SpMat<T>& M) const ...@@ -1211,16 +1431,46 @@ bool SpMat<T>::same_sparsity(const SpMat<T>& M) const
template<class T> template<class T>
SpMat<T>& SpMat<T>::add_same_sparsity_mat_to_me(const SpMat<T>& M, double s) SpMat<T>& SpMat<T>::add_same_sparsity_mat_to_me(const SpMat<T>& M, double s)
{ {
for (unsigned int c=0; c<_n; c++) { if (_nt==1) { // If we are to run single threaded
for (unsigned int c=0; c<_n; c++) {
if (_val[c].size()) {
std::vector<T>& val = _val[c];
const std::vector<T>& Mval = M._val[c];
for (unsigned int i=0; i<val.size(); i++) {
val[i] += s*Mval[i];
}
}
}
}
else { // We are to run multi threaded
std::vector<unsigned int> col_pt = this->columns_per_thread();
std::vector<std::thread> threads(_nt-1); // + main thread makes _nt
for (unsigned int i=0; i<_nt-1; i++) {
threads[i] = std::thread(&SpMat<T>::add_same_sparsity_helper,this,
col_pt[i],col_pt[i+1],std::ref(M),s);
}
this->add_same_sparsity_helper(col_pt[_nt-1],col_pt[_nt],M,s);
std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join));
}
return(*this);
}
template<class T>
void SpMat<T>::add_same_sparsity_helper(unsigned int first,
unsigned int last,
const SpMat<T>& M,
double s)
{
for (unsigned int c=first; c<last; c++) {
if (_val[c].size()) { if (_val[c].size()) {
std::vector<T>& val = _val[c]; std::vector<T>& val = _val[c];
const std::vector<T>& Mval = M._val[c]; const std::vector<T>& Mval = M._val[c];
for (unsigned int i=0; i<val.size(); i++) { for (unsigned int i=0; i<val.size(); i++) {
val[i] += s*Mval[i]; val[i] += s*Mval[i];
} }
} }
} }
return(*this); return;
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
...@@ -1234,10 +1484,58 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) ...@@ -1234,10 +1484,58 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s)
{ {
if (_m != M._m || _n != M._n) throw SpMatException("add_diff_sparsity_mat_to_me: Size mismatch between matrices"); if (_m != M._m || _n != M._n) throw SpMatException("add_diff_sparsity_mat_to_me: Size mismatch between matrices");
if (_nt==1) { // If we are to run single threaded
Accumulator<T> acc(_m);
_nz = 0;
for (unsigned int c=0; c<_n; c++) {
acc.Reset();
if (M._ri[c].size()) {
const std::vector<unsigned int>& Mri = M._ri[c];
const std::vector<T>& Mval = M._val[c];
for (unsigned int i=0; i<Mri.size(); i++) {
acc(Mri[i]) += s*Mval[i];
}
std::vector<unsigned int>& ri = _ri[c];
std::vector<T>& val = _val[c];
for (unsigned int i=0; i<ri.size(); i++) {
acc(ri[i]) += val[i];
}
ri.resize(acc.NO());
val.resize(acc.NO());
for (unsigned int i=0; i<acc.NO(); i++) {
ri[i] = acc.ri(i);
val[i] = acc.val(i);
}
_nz += acc.NO();
}
}
}
else { // We are to run multi-threaded
std::vector<unsigned int> col_pt = this->columns_per_thread(false);
std::vector<std::thread> threads(_nt-1); // + main thread makes _nt
std::vector<unsigned int> nz(_nt,0); // Non-zero elements per thread
for (unsigned int i=0; i<_nt-1; i++) {
threads[i] = std::thread(&SpMat<T>::add_diff_sparsity_helper,this,col_pt[i],
col_pt[i+1],std::ref(M),s,std::ref(nz[i]));
}
this->add_diff_sparsity_helper(col_pt[_nt-1],col_pt[_nt],M,s,nz[_nt-1]);
std::for_each(threads.begin(),threads.end(),std::mem_fn(&std::thread::join));
_nz = std::accumulate(nz.begin(),nz.end(),0u);
}
return(*this);
}
template<class T>
void SpMat<T>::add_diff_sparsity_helper(unsigned int first,
unsigned int last,
const SpMat<T>& M,
double s,
unsigned int& nz)
{
Accumulator<T> acc(_m); Accumulator<T> acc(_m);
_nz = 0; nz = 0;
for (unsigned int c=0; c<_n; c++) { for (unsigned int c=first; c<last; c++) {
acc.Reset(); acc.Reset();
if (M._ri[c].size()) { if (M._ri[c].size()) {
const std::vector<unsigned int>& Mri = M._ri[c]; const std::vector<unsigned int>& Mri = M._ri[c];
...@@ -1256,10 +1554,10 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) ...@@ -1256,10 +1554,10 @@ SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s)
ri[i] = acc.ri(i); ri[i] = acc.ri(i);
val[i] = acc.val(i); val[i] = acc.val(i);
} }
_nz += acc.NO(); nz += acc.NO();
} }
} }
return(*this); return;
} }
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
...@@ -1283,6 +1581,47 @@ bool SpMat<T>::is_sorted() const ...@@ -1283,6 +1581,47 @@ bool SpMat<T>::is_sorted() const
return(true); return(true);
} }
/////////////////////////////////////////////////////////////////////
//
// Returns a vector vec where vec[i] and vec[i+1] contains the first
// column and one past the last column respectively for thread i.
// It is based on trying to balance the number of non-zero elements
// between the different threads.
//
/////////////////////////////////////////////////////////////////////
template<class T>
std::vector<unsigned int> SpMat<T>::columns_per_thread(bool balance) const
{
std::vector<unsigned int> cpt(_nt+1,0);
if (balance) { // Try to balance number of non-zero elements between threads
unsigned int nnz_per_thread = std::lround(static_cast<double>(_nz)/static_cast<double>(_nt));
unsigned int cum_nnz = 0;
unsigned int ti = 1;
for (unsigned int c=0; c<_n; c++) {
cum_nnz += _ri[c].size();
if (cum_nnz >= nnz_per_thread) {
if (std::abs(int(cum_nnz - nnz_per_thread)) < std::abs(int(cum_nnz - _ri[c].size() - nnz_per_thread))) { // If this c is best
cpt[ti] = c+1;
cum_nnz = 0;
}
else { // If previous c is best
cpt[ti] = c;
cum_nnz = _ri[c].size();
}
if (ti < _nt-1) ti++;
else break;
}
}
cpt[_nt] = _n;
}
else { // If we just want the same number of columns
double dcpt = static_cast<double>(_n) / static_cast<double>(_nt);
for (unsigned int i=1; i<_nt; i++) cpt[i] = static_cast<unsigned int>(std::floor(i*dcpt));
cpt[_nt] = _n;
}
return(cpt);
}
/* /*
template<class T> template<class T>
SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s) SpMat<T>& SpMat<T>::add_diff_sparsity_mat_to_me(const SpMat<T>& M, double s)
......
...@@ -76,6 +76,9 @@ public: ...@@ -76,6 +76,9 @@ public:
// Basic properties // Basic properties
virtual unsigned int Nrows() const = 0; virtual unsigned int Nrows() const = 0;
virtual unsigned int Ncols() const = 0; virtual unsigned int Ncols() const = 0;
virtual unsigned int Nthreads() const = 0;
virtual void SetNthreads(unsigned int nt) = 0;
virtual unsigned int NZ() const = 0;
// Print matrix (for debugging) // Print matrix (for debugging)
virtual void Print(const std::string fname=std::string("")) const = 0; virtual void Print(const std::string fname=std::string("")) const = 0;
...@@ -137,14 +140,14 @@ public: ...@@ -137,14 +140,14 @@ public:
// Constructors, destructor and assignment // Constructors, destructor and assignment
SparseBFMatrix() SparseBFMatrix()
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>())) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>())) {}
SparseBFMatrix(unsigned int m, unsigned int n) SparseBFMatrix(unsigned int m, unsigned int n, unsigned int nt=1)
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n))) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,nt))) {}
SparseBFMatrix(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp) SparseBFMatrix(unsigned int m, unsigned int n, const unsigned int *irp, const unsigned int *jcp, const double *sp, unsigned int nt=1)
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,irp,jcp,sp))) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(m,n,irp,jcp,sp,nt))) {}
SparseBFMatrix(const MISCMATHS::SpMat<T>& M) SparseBFMatrix(const MISCMATHS::SpMat<T>& M)
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M))) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M))) {}
SparseBFMatrix(const NEWMAT::Matrix& M) SparseBFMatrix(const NEWMAT::Matrix& M, unsigned int nt=1)
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M))) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(M,nt))) {}
SparseBFMatrix(const SparseBFMatrix<T>& M) SparseBFMatrix(const SparseBFMatrix<T>& M)
: mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(*(M.mp)))) {} : mp(std::shared_ptr<MISCMATHS::SpMat<T> >(new MISCMATHS::SpMat<T>(*(M.mp)))) {}
virtual ~SparseBFMatrix() {} virtual ~SparseBFMatrix() {}
...@@ -157,9 +160,15 @@ public: ...@@ -157,9 +160,15 @@ public:
// Access as NEWMAT::Matrix // Access as NEWMAT::Matrix
virtual NEWMAT::ReturnMatrix AsMatrix() const {NEWMAT::Matrix ret; ret = mp->AsNEWMAT(); ret.Release(); return(ret);} virtual NEWMAT::ReturnMatrix AsMatrix() const {NEWMAT::Matrix ret; ret = mp->AsNEWMAT(); ret.Release(); return(ret);}
// Access as MISCMATHS::SpMat<T> matrix. Cannot be used from pointer to base-class.
MISCMATHS::SpMat<T> AsSpMat() const {return(*mp);}
// Basic properties // Basic properties
virtual unsigned int Nrows() const {return(mp->Nrows());} virtual unsigned int Nrows() const {return(mp->Nrows());}
virtual unsigned int Ncols() const {return(mp->Ncols());} virtual unsigned int Ncols() const {return(mp->Ncols());}
virtual unsigned int Nthreads() const {return(mp->Nthreads());}
virtual void SetNthreads(unsigned int nt) {mp->SetNthreads(nt);}
virtual unsigned int NZ() const {return(mp->NZ());}
// Print matrix (for debugging) // Print matrix (for debugging)
virtual void Print(const std::string fname=std::string("")) const {mp->Print(fname);} virtual void Print(const std::string fname=std::string("")) const {mp->Print(fname);}
...@@ -236,6 +245,9 @@ public: ...@@ -236,6 +245,9 @@ public:
// Basic properties // Basic properties
virtual unsigned int Nrows() const {return(mp->Nrows());} virtual unsigned int Nrows() const {return(mp->Nrows());}
virtual unsigned int Ncols() const {return(mp->Ncols());} virtual unsigned int Ncols() const {return(mp->Ncols());}
virtual unsigned int Nthreads() const {return(1);}
virtual void SetNthreads(unsigned int nt) {} // Null function
virtual unsigned int NZ() const {return(mp->Nrows()*mp->Ncols());}
// Print matrix (for debugging) // Print matrix (for debugging)
virtual void Print(const std::string fname=std::string("")) const; virtual void Print(const std::string fname=std::string("")) const;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment