Developer Documentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
SparseMatrix.inl
1 /*
2 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 Redistributions of source code must retain the above copyright notice, this list of
9 conditions and the following disclaimer. Redistributions in binary form must reproduce
10 the above copyright notice, this list of conditions and the following disclaimer
11 in the documentation and/or other materials provided with the distribution.
12 
13 Neither the name of the Johns Hopkins University nor the names of its contributors
14 may be used to endorse or promote products derived from this software without specific
15 prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 DAMAGE.
27 */
28 
29 #ifndef DOXY_IGNORE_THIS
30 
31 #include <float.h>
32 #include <stdio.h>
33 #include <string.h>
34 
35 
37 // SparseMatrix //
40 // SparseMatrix Methods and Memebers //
42 
43 template< class T >
45 {
46  _contiguous = false;
47  _maxEntriesPerRow = 0;
48  rows = 0;
49  rowSizes = NullPointer< int >( );
50  m_ppElements = NullPointer< Pointer( MatrixEntry< T > ) >( );
51 }
52 
53 template< class T > SparseMatrix< T >::SparseMatrix( int rows ) {
54  _contiguous = false;
55  _maxEntriesPerRow = 0;
56  rows = 0;
57  rowSizes = NullPointer< int >( );
58  m_ppElements = NullPointer< Pointer( MatrixEntry< T > ) >( );
59  Resize( rows );
60 }
61 
62 template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) {
63  _contiguous = false;
64  _maxEntriesPerRow = 0;
65  rows = 0;
66  rowSizes = NullPointer< int >( );
67  m_ppElements = NullPointer< Pointer( MatrixEntry< T > ) >( );
68  Resize( rows , maxEntriesPerRow );
69 }
70 
71 // Had to rewrite the delegating constructor as we need to support VS2008 for now
72 //template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
73 //template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
74 
75 template< class T >
77 {
78  _contiguous = false;
79  _maxEntriesPerRow = 0;
80  rows = 0;
81  rowSizes = NullPointer< int >( );
82  m_ppElements = NullPointer< Pointer( MatrixEntry< T > ) >( );
83 
84  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
85  else Resize( M.rows );
86  for( int i=0 ; i<rows ; i++ )
87  {
88  SetRowSize( i , M.rowSizes[i] );
89  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
90  }
91 }
92 
93 template<class T>
94 int SparseMatrix<T>::Entries( void ) const
95 {
96  int e = 0;
97  for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
98  return e;
99 }
100 template<class T>
102 {
103  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
104  else Resize( M.rows );
105  for( int i=0 ; i<rows ; i++ )
106  {
107  SetRowSize( i , M.rowSizes[i] );
108  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
109  }
110  return *this;
111 }
112 
113 template<class T>
114 SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
115 
116 template< class T >
117 bool SparseMatrix< T >::write( const char* fileName ) const
118 {
119  FILE* fp = fopen( fileName , "wb" );
120  if( !fp ) return false;
121  bool ret = write( fp );
122  fclose( fp );
123  return ret;
124 }
125 template< class T >
126 bool SparseMatrix< T >::read( const char* fileName )
127 {
128  FILE* fp = fopen( fileName , "rb" );
129  if( !fp ) return false;
130  bool ret = read( fp );
131  fclose( fp );
132  return ret;
133 }
134 template< class T >
135 bool SparseMatrix< T >::write( FILE* fp ) const
136 {
137  if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
138  if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
139  for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
140  return true;
141 }
142 template< class T >
143 bool SparseMatrix< T >::read( FILE* fp )
144 {
145  int r;
146  if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
147  Resize( r );
148  if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
149  for( int i=0 ; i<rows ; i++ )
150  {
151  r = rowSizes[i];
152  rowSizes[i] = 0;
153  SetRowSize( i , r );
154  if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
155  }
156  return true;
157 }
158 
159 
160 template< class T >
161 void SparseMatrix< T >::Resize( int r )
162 {
163  if( rows>0 )
164  {
165  if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
166  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
167  FreePointer( m_ppElements );
168  FreePointer( rowSizes );
169  }
170  rows = r;
171  if( r )
172  {
173  rowSizes = AllocPointer< int >( r );
174  m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
175  memset( rowSizes , 0 , sizeof( int ) * r );
176  }
177  _contiguous = false;
178  _maxEntriesPerRow = 0;
179 }
180 template< class T >
181 void SparseMatrix< T >::Resize( int r , int e )
182 {
183  if( rows>0 )
184  {
185  if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
186  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
187  FreePointer( m_ppElements );
188  FreePointer( rowSizes );
189  }
190  rows = r;
191  if( r )
192  {
193  rowSizes = AllocPointer< int >( r );
194  m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
195  m_ppElements[0] = AllocPointer< MatrixEntry< T > >( r * e );
196  memset( rowSizes , 0 , sizeof( int ) * r );
197  for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
198  }
199  _contiguous = true;
200  _maxEntriesPerRow = e;
201 }
202 
203 template<class T>
204 void SparseMatrix< T >::SetRowSize( int row , int count )
205 {
206  if( _contiguous )
207  {
208  if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
209  rowSizes[row] = count;
210  }
211  else if( row>=0 && row<rows )
212  {
213  if( rowSizes[row] ) FreePointer( m_ppElements[row] );
214  if( count>0 ) m_ppElements[row] = AllocPointer< MatrixEntry< T > >( count );
215  }
216 }
217 
218 
219 template<class T>
221 {
222  Resize(this->m_N, this->m_M);
223 }
224 
225 template<class T>
227 {
228  SetZero();
229  for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
230  (*this)(ij,ij) = T(1);
231 }
232 
233 template<class T>
235 {
236  SparseMatrix<T> M(*this);
237  M *= V;
238  return M;
239 }
240 
241 template<class T>
243 {
244  for (int i=0; i<this->Rows(); i++)
245  {
246  for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
247  }
248  return *this;
249 }
250 
251 template<class T>
253 {
254  SparseMatrix<T> R( this->Rows(), M.Columns() );
255  for(int i=0; i<R.Rows(); i++){
256  for(int ii=0;ii<m_ppElements[i].size();ii++){
257  int N=m_ppElements[i][ii].N;
258  T Value=m_ppElements[i][ii].Value;
259  for(int jj=0;jj<M.m_ppElements[N].size();jj++){
260  R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
261  }
262  }
263  }
264  return R;
265 }
266 
267 template<class T>
268 template<class T2>
270 {
271  PoissonVector<T2> R( rows );
272 
273  for (int i=0; i<rows; i++)
274  {
275  T2 temp=T2();
276  for(int ii=0;ii<rowSizes[i];ii++){
277  temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
278  }
279  R(i)=temp;
280  }
281  return R;
282 }
283 
284 template<class T>
285 template<class T2>
286 void SparseMatrix<T>::Multiply( const PoissonVector<T2>& In , PoissonVector<T2>& Out , int threads ) const
287 {
288 #ifdef USE_OPENMP
289 #pragma omp parallel for num_threads( threads ) schedule( static )
290 #endif
291  for( int i=0 ; i<rows ; i++ )
292  {
293  T2 temp = T2();
294  temp *= 0;
295  for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
296  Out.m_pV[i] = temp;
297  }
298 }
299 
300 template<class T>
302 {
303  return Multiply(M);
304 }
305 template<class T>
306 template<class T2>
308 {
309  return Multiply(V);
310 }
311 
312 template<class T>
314 {
315  SparseMatrix<T> M( this->Columns(), this->Rows() );
316 
317  for (int i=0; i<this->Rows(); i++)
318  {
319  for(int ii=0;ii<m_ppElements[i].size();ii++){
320  M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
321  }
322  }
323  return M;
324 }
325 
326 template<class T>
327 template<class T2>
328 int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const PoissonVector<T2>& b , int iters , PoissonVector<T2>& solution , const T2 eps , int reset , int threads )
329 {
330  if( reset )
331  {
332  solution.Resize( b.Dimensions() );
333  solution.SetZero();
334  }
336  r.Resize( solution.Dimensions() );
337  M.Multiply( solution , r );
338  r = b - r;
339  PoissonVector< T2 > d = r;
340  double delta_new = 0, delta_0 = 0;
341  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
342  delta_0 = delta_new;
343  if( delta_new<eps ) return 0;
344  int ii;
346  q.Resize( d.Dimensions() );
347  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
348  {
349  M.Multiply( d , q , threads );
350  double dDotQ = 0 , alpha = 0;
351  for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
352  alpha = delta_new / dDotQ;
353 #ifdef USE_OPENMP
354 #pragma omp parallel for num_threads( threads ) schedule( static )
355 #endif
356  for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
357  if( !(ii%50) )
358  {
359  r.Resize( solution.Dimensions() );
360  M.Multiply( solution , r , threads );
361  r = b - r;
362  }
363  else
364 #ifdef USE_OPENMP
365 #pragma omp parallel for num_threads( threads ) schedule( static )
366 #endif
367  for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
368 
369  double delta_old = delta_new , beta;
370  delta_new = 0;
371  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
372  beta = delta_new / delta_old;
373 #ifdef USE_OPENMP
374 #pragma omp parallel for num_threads( threads ) schedule( static )
375 #endif
376  for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
377  }
378  return ii;
379 }
380 
381 // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
382 template<class T>
383 int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const PoissonVector<T>& b,int iters,PoissonVector<T>& solution,const T eps){
384  SparseMatrix mTranspose=M.Transpose();
385  PoissonVector<T> bb=mTranspose*b;
386  PoissonVector<T> d,r,Md;
387  T alpha,beta,rDotR;
388  int i;
389 
390  solution.Resize(M.Columns());
391  solution.SetZero();
392 
393  d=r=bb;
394  rDotR=r.Dot(r);
395  for(i=0;i<iters && rDotR>eps;i++){
396  T temp;
397  Md=mTranspose*(M*d);
398  alpha=rDotR/d.Dot(Md);
399  solution+=d*alpha;
400  r-=Md*alpha;
401  temp=r.Dot(r);
402  beta=temp/rDotR;
403  rDotR=temp;
404  d=r+d*beta;
405  }
406  return i;
407 }
408 
409 
410 
411 
413 // SparseSymmetricMatrix //
415 template<class T>
416 template<class T2>
418 template<class T>
419 template<class T2>
421 {
423 
424  for(int i=0; i<SparseMatrix< T >::rows; i++){
425  for(int ii=0;ii<SparseMatrix< T >::rowSizes[i];ii++)
426  {
427  int j=SparseMatrix< T >::m_ppElements[i][ii].N;
428  R(i)+=SparseMatrix< T >::m_ppElements[i][ii].Value * V.m_pV[j];
429  R(j)+=SparseMatrix< T >::m_ppElements[i][ii].Value * V.m_pV[i];
430  }
431  }
432  return R;
433 }
434 
435 template<class T>
436 template<class T2>
437 void SparseSymmetricMatrix<T>::Multiply( const PoissonVector<T2>& In , PoissonVector<T2>& Out , bool addDCTerm ) const
438 {
439  Out.SetZero();
440  const T2* in = &In[0];
441  T2* out = &Out[0];
442  T2 dcTerm = T2( 0 );
443  if( addDCTerm )
444  {
445  for( int i=0 ; i<SparseMatrix< T >::rows ; i++ ) dcTerm += in[i];
446  dcTerm /= SparseMatrix< T >::rows;
447  }
448  for( int i=0 ; i<SparseMatrix< T >::rows ; i++ )
449  {
451  const MatrixEntry<T>* end = temp + SparseMatrix< T >::rowSizes[i];
452  const T2& in_i_ = in[i];
453  T2 out_i = T2(0);
454  for( ; temp!=end ; temp++ )
455  {
456  int j=temp->N;
457  T2 v=temp->Value;
458  out_i += v * in[j];
459  out[j] += v * in_i_;
460  }
461  out[i] += out_i;
462  }
463  if( addDCTerm ) for( int i=0 ; i<SparseMatrix< T >::rows ; i++ ) out[i] += dcTerm;
464 }
465 template<class T>
466 template<class T2>
467 void SparseSymmetricMatrix<T>::Multiply( const PoissonVector<T2>& In , PoissonVector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
468 {
469  int dim = int( In.Dimensions() );
470  const T2* in = &In[0];
471  int threads = OutScratch.threads();
472  if( addDCTerm )
473  {
474  T2 dcTerm = 0;
475 #ifdef USE_OPENMP
476 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
477 #endif
478  for( int t=0 ; t<threads ; t++ )
479  {
480  T2* out = OutScratch[t];
481  memset( out , 0 , sizeof( T2 ) * dim );
482  for( int i=(SparseMatrix< T >::rows*t)/threads ; i<(SparseMatrix< T >::rows*(t+1))/threads ; i++ )
483  {
484  const T2& in_i_ = in[i];
485  T2& out_i_ = out[i];
486  ConstPointer( MatrixEntry< T > ) temp;
487  ConstPointer( MatrixEntry< T > ) end;
488  for( temp = SparseMatrix< T >::m_ppElements[i] , end = temp+SparseMatrix< T >::rowSizes[i] ; temp!=end ; temp++ )
489  {
490  int j = temp->N;
491  T2 v = temp->Value;
492  out_i_ += v * in[j];
493  out[j] += v * in_i_;
494  }
495  dcTerm += in_i_;
496  }
497  }
498  dcTerm /= dim;
499  dim = int( Out.Dimensions() );
500  T2* out = &Out[0];
501 #ifdef USE_OPENMP
502 #pragma omp parallel for num_threads( threads ) schedule( static )
503 #endif
504  for( int i=0 ; i<dim ; i++ )
505  {
506  T2 _out = dcTerm;
507  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
508  out[i] = _out;
509  }
510  }
511  else
512  {
513 #ifdef USE_OPENMP
514 #pragma omp parallel for num_threads( threads )
515 #endif
516  for( int t=0 ; t<threads ; t++ )
517  {
518  T2* out = OutScratch[t];
519  memset( out , 0 , sizeof( T2 ) * dim );
520  for( int i=(SparseMatrix< T >::rows*t)/threads ; i<(SparseMatrix< T >::rows*(t+1))/threads ; i++ )
521  {
522  T2 in_i_ = in[i];
523  T2 out_i_ = T2();
524  ConstPointer( MatrixEntry< T > ) temp;
525  ConstPointer( MatrixEntry< T > ) end;
526  for( temp = SparseMatrix< T >::m_ppElements[i] , end = temp+SparseMatrix< T >::rowSizes[i] ; temp!=end ; temp++ )
527  {
528  int j = temp->N;
529  T2 v = temp->Value;
530  out_i_ += v * in[j];
531  out[j] += v * in_i_;
532  }
533  out[i] += out_i_;
534  }
535  }
536  dim = int( Out.Dimensions() );
537  T2* out = &Out[0];
538 #ifdef USE_OPENMP
539 #pragma omp parallel for num_threads( threads ) schedule( static )
540 #endif
541  for( int i=0 ; i<dim ; i++ )
542  {
543  T2 _out = T2(0);
544  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
545  out[i] = _out;
546  }
547  }
548 }
549 template<class T>
550 template<class T2>
551 void SparseSymmetricMatrix<T>::Multiply( const PoissonVector<T2>& In , PoissonVector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
552 {
553  int dim = In.Dimensions();
554  const T2* in = &In[0];
555  int threads = OutScratch.size();
556 #ifdef USE_OPENMP
557 #pragma omp parallel for num_threads( threads )
558 #endif
559  for( int t=0 ; t<threads ; t++ )
560  for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
561 #ifdef USE_OPENMP
562 #pragma omp parallel for num_threads( threads )
563 #endif
564  for( int t=0 ; t<threads ; t++ )
565  {
566  T2* out = OutScratch[t];
567  for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
568  {
570  const MatrixEntry<T>* end = temp + SparseMatrix< T >::rowSizes[i];
571  const T2& in_i_ = in[i];
572  T2& out_i_ = out[i];
573  for( ; temp!=end ; temp++ )
574  {
575  int j = temp->N;
576  T2 v = temp->Value;
577  out_i_ += v * in[j];
578  out[j] += v * in_i_;
579  }
580  }
581  }
582  T2* out = &Out[0];
583 #ifdef USE_OPENMP
584 #pragma omp parallel for num_threads( threads ) schedule( static )
585 #endif
586  for( int i=0 ; i<Out.Dimensions() ; i++ )
587  {
588  T2& _out = out[i];
589  _out = T2(0);
590  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
591  }
592 }
593 #ifdef WIN32
594 #ifndef _AtomicIncrement_
595 #define _AtomicIncrement_
596 #include <WinBase.h>
597 inline void AtomicIncrement( volatile float* ptr , float addend )
598 {
599  float newValue = *ptr;
600  LONG& _newValue = *( (LONG*)&newValue );
601  LONG _oldValue;
602  for( ;; )
603  {
604  _oldValue = _newValue;
605  //newValue += addend; Commented out as it was useless
606  _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
607  if( _newValue==_oldValue ) break;
608  }
609 }
610 inline void AtomicIncrement( volatile double* ptr , double addend )
611 //inline void AtomicIncrement( double* ptr , double addend )
612 {
613  double newValue = *ptr;
614  LONGLONG& _newValue = *( (LONGLONG*)&newValue );
615  LONGLONG _oldValue;
616  do
617  {
618  _oldValue = _newValue;
619  //newValue += addend;
620  _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
621  }
622  while( _newValue!=_oldValue );
623 }
624 #endif // _AtomicIncrement_
625 template< class T >
626 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const PoissonVector< float >& In , PoissonVector< float >& Out , int threads , const int* partition=NULL )
627 {
628  Out.SetZero();
629  const float* in = &In[0];
630  float* out = &Out[0];
631  if( partition )
632 #ifdef USE_OPENMP
633 #pragma omp parallel for num_threads( threads )
634 #endif
635  for( int t=0 ; t<threads ; t++ )
636  for( int i=partition[t] ; i<partition[t+1] ; i++ )
637  {
638  const MatrixEntry< T >* temp = A[i];
639  const MatrixEntry< T >* end = temp + A.rowSizes[i];
640  const float& in_i = in[i];
641  float out_i = 0.;
642  for( ; temp!=end ; temp++ )
643  {
644  int j = temp->N;
645  float v = temp->Value;
646  out_i += v * in[j];
647  AtomicIncrement( out+j , v * in_i );
648  }
649  AtomicIncrement( out+i , out_i );
650  }
651  else
652 #ifdef USE_OPENMP
653 #pragma omp parallel for num_threads( threads )
654 #endif
655  for( int i=0 ; i<A.rows ; i++ )
656  {
657  const MatrixEntry< T >* temp = A[i];
658  const MatrixEntry< T >* end = temp + A.rowSizes[i];
659  const float& in_i = in[i];
660  float out_i = 0.f;
661  for( ; temp!=end ; temp++ )
662  {
663  int j = temp->N;
664  float v = temp->Value;
665  out_i += v * in[j];
666  AtomicIncrement( out+j , v * in_i );
667  }
668  AtomicIncrement( out+i , out_i );
669  }
670 }
671 template< class T >
672 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const PoissonVector< double >& In , PoissonVector< double >& Out , int threads , const int* partition=NULL )
673 {
674  Out.SetZero();
675  const double* in = &In[0];
676  double* out = &Out[0];
677 
678  if( partition )
679 #ifdef USE_OPENMP
680 #pragma omp parallel for num_threads( threads )
681 #endif
682  for( int t=0 ; t<threads ; t++ )
683  for( int i=partition[t] ; i<partition[t+1] ; i++ )
684  {
685  const MatrixEntry< T >* temp = A[i];
686  const MatrixEntry< T >* end = temp + A.rowSizes[i];
687  const double& in_i = in[i];
688  double out_i = 0.;
689  for( ; temp!=end ; temp++ )
690  {
691  int j = temp->N;
692  T v = temp->Value;
693  out_i += v * in[j];
694  AtomicIncrement( out+j , v * in_i );
695  }
696  AtomicIncrement( out+i , out_i );
697  }
698  else
699 #ifdef USE_OPENMP
700 #pragma omp parallel for num_threads( threads )
701 #endif
702  for( int i=0 ; i<A.rows ; i++ )
703  {
704  const MatrixEntry< T >* temp = A[i];
705  const MatrixEntry< T >* end = temp + A.rowSizes[i];
706  const double& in_i = in[i];
707  double out_i = 0.;
708  for( ; temp!=end ; temp++ )
709  {
710  int j = temp->N;
711  T v = temp->Value;
712  out_i += v * in[j];
713  AtomicIncrement( out+j , v * in_i );
714  }
715  AtomicIncrement( out+i , out_i );
716  }
717 }
718 
719 template< class T >
720 template< class T2 >
721 int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const PoissonVector< T2 >& b , int iters , PoissonVector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
722 {
723  eps *= eps;
724  int dim = b.Dimensions();
725  if( reset )
726  {
727  x.Resize( dim );
728  x.SetZero();
729  }
730  PoissonVector< T2 > r( dim ) , d( dim ) , q( dim );
731  PoissonVector< T2 > temp;
732  if( solveNormal ) temp.Resize( dim );
733  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
734  const T2* _b = &b[0];
735 
736  std::vector< int > partition( threads+1 );
737  {
738  int eCount = 0;
739  for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
740  partition[0] = 0;
741 #ifdef USE_OPENMP
742 #pragma omp parallel for num_threads( threads )
743 #endif
744  for( int t=0 ; t<threads ; t++ )
745  {
746  int _eCount = 0;
747  for( int i=0 ; i<A.rows ; i++ )
748  {
749  _eCount += A.rowSizes[i];
750  if( _eCount*threads>=eCount*(t+1) )
751  {
752  partition[t+1] = i;
753  break;
754  }
755  }
756  }
757  partition[threads] = A.rows;
758  }
759  if( solveNormal )
760  {
761  MultiplyAtomic( A , x , temp , threads , &partition[0] );
762  MultiplyAtomic( A , temp , r , threads , &partition[0] );
763  MultiplyAtomic( A , b , temp , threads , &partition[0] );
764 #ifdef USE_OPENMP
765 #pragma omp parallel for num_threads( threads ) schedule( static )
766 #endif
767  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
768  }
769  else
770  {
771  MultiplyAtomic( A , x , r , threads , &partition[0] );
772 #ifdef USE_OPENMP
773 #pragma omp parallel for num_threads( threads ) schedule( static )
774 #endif
775  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
776  }
777  double delta_new = 0 , delta_0;
778  for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
779  delta_0 = delta_new;
780  if( delta_new<eps )
781  {
782  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
783  return 0;
784  }
785  int ii;
786  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
787  {
788  if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
789  else MultiplyAtomic( A , d , q , threads , &partition[0] );
790  double dDotQ = 0;
791  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
792  T2 alpha = T2( delta_new / dDotQ );
793 #ifdef USE_OPENMP
794 #pragma omp parallel for num_threads( threads ) schedule( static )
795 #endif
796  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
797  if( (ii%50)==(50-1) )
798  {
799  r.Resize( dim );
800  if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
801  else MultiplyAtomic( A , x , r , threads , &partition[0] );
802 #ifdef USE_OPENMP
803 #pragma omp parallel for num_threads( threads ) schedule( static )
804 #endif
805  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
806  }
807  else
808 #ifdef USE_OPENMP
809 #pragma omp parallel for num_threads( threads ) schedule( static )
810 #endif
811  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
812 
813  double delta_old = delta_new;
814  delta_new = 0;
815  for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
816  T2 beta = T2( delta_new / delta_old );
817 #ifdef USE_OPENMP
818 #pragma omp parallel for num_threads( threads ) schedule( static )
819 #endif
820  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
821  }
822  return ii;
823 }
824 #endif // WIN32
825 template< class T >
826 template< class T2 >
827 int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const PoissonVector<T2>& b , int iters , PoissonVector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
828 {
829  eps *= eps;
830  int dim = int( b.Dimensions() );
831  PoissonVector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
832  if( reset ) x.Resize( dim );
833  if( solveNormal ) temp.Resize( dim );
834  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
835  const T2* _b = &b[0];
836 
837  double delta_new = 0 , delta_0;
838  if( solveNormal )
839  {
840  A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
841 #ifdef USE_OPENMP
842 #pragma omp parallel for num_threads( scratch.threads() ) reduction( + : delta_new )
843 #endif
844  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
845  }
846  else
847  {
848  A.Multiply( x , r , scratch , addDCTerm );
849 #ifdef USE_OPENMP
850 #pragma omp parallel for num_threads( scratch.threads() ) reduction ( + : delta_new )
851 #endif
852  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
853  }
854  delta_0 = delta_new;
855  if( delta_new<eps )
856  {
857  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
858  return 0;
859  }
860  int ii;
861  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
862  {
863  if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
864  else A.Multiply( d , q , scratch , addDCTerm );
865  double dDotQ = 0;
866 #ifdef USE_OPENMP
867 #pragma omp parallel for num_threads( scratch.threads() ) reduction( + : dDotQ )
868 #endif
869  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
870  T2 alpha = T2( delta_new / dDotQ );
871  double delta_old = delta_new;
872  delta_new = 0;
873  if( (ii%50)==(50-1) )
874  {
875 #ifdef USE_OPENMP
876 #pragma omp parallel for num_threads( scratch.threads() )
877 #endif
878  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
879  r.Resize( dim );
880  if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
881  else A.Multiply( x , r , scratch , addDCTerm );
882 #ifdef USE_OPENMP
883 #pragma omp parallel for num_threads( scratch.threads() ) reduction( + : delta_new )
884 #endif
885  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
886  }
887  else
888 #ifdef USE_OPENMP
889 #pragma omp parallel for num_threads( scratch.threads() ) reduction( + : delta_new )
890 #endif
891  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
892 
893  T2 beta = T2( delta_new / delta_old );
894 #ifdef USE_OPENMP
895 #pragma omp parallel for num_threads( scratch.threads() )
896 #endif
897  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
898  }
899  return ii;
900 }
901 template< class T >
902 template< class T2 >
903 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const PoissonVector<T2>& b , int iters , PoissonVector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
904 {
905  eps *= eps;
906  int dim = int( b.Dimensions() );
907  MapReduceVector< T2 > outScratch;
908  if( threads<1 ) threads = 1;
909  if( threads>1 ) outScratch.resize( threads , dim );
910  if( reset ) x.Resize( dim );
911  PoissonVector< T2 > r( dim ) , d( dim ) , q( dim );
912  PoissonVector< T2 > temp;
913  if( solveNormal ) temp.Resize( dim );
914  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
915  const T2* _b = &b[0];
916 
917  double delta_new = 0 , delta_0;
918 
919  if( solveNormal )
920  {
921  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
922  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
923 #ifdef USE_OPENMP
924 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
925 #endif
926  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
927  }
928  else
929  {
930  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
931  else A.Multiply( x , r , addDCTerm );
932 #ifdef USE_OPENMP
933 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
934 #endif
935  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
936  }
937 
938  delta_0 = delta_new;
939  if( delta_new<eps )
940  {
941  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
942  return 0;
943  }
944  int ii;
945  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
946  {
947  if( solveNormal )
948  {
949  if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
950  else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
951  }
952  else
953  {
954  if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
955  else A.Multiply( d , q , addDCTerm );
956  }
957  double dDotQ = 0;
958 #ifdef USE_OPENMP
959 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
960 #endif
961  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
962  T2 alpha = T2( delta_new / dDotQ );
963  double delta_old = delta_new;
964  delta_new = 0;
965 
966  if( (ii%50)==(50-1) )
967  {
968 #ifdef USE_OPENMP
969 #pragma omp parallel for num_threads( threads )
970 #endif
971  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
972  r.SetZero();
973  if( solveNormal )
974  {
975  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
976  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
977  }
978  else
979  {
980  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
981  else A.Multiply( x , r , addDCTerm );
982  }
983 #ifdef USE_OPENMP
984 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
985 #endif
986  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
987  }
988  else
989  {
990 #ifdef USE_OPENMP
991 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
992 #endif
993  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
994  }
995 
996  T2 beta = T2( delta_new / delta_old );
997 #ifdef USE_OPENMP
998 #pragma omp parallel for num_threads( threads )
999 #endif
1000  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
1001  }
1002  return ii;
1003 }
1004 
1005 template<class T>
1006 template<class T2>
1007 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const PoissonVector<T2>& diagonal , const PoissonVector<T2>& b , int iters , PoissonVector<T2>& solution , int reset )
1008 {
1009  PoissonVector<T2> d,r,Md;
1010 
1011  if(reset)
1012  {
1013  solution.Resize(b.Dimensions());
1014  solution.SetZero();
1015  }
1016  Md.Resize(M.rows);
1017  for( int i=0 ; i<iters ; i++ )
1018  {
1019  M.Multiply( solution , Md );
1020  r = b-Md;
1021  // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
1022  // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
1023  // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
1024  for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
1025  }
1026  return iters;
1027 }
1028 template< class T >
1029 template< class T2 >
1031 {
1032  diagonal.Resize( SparseMatrix< T >::rows );
1033  for( int i=0 ; i<SparseMatrix< T >::rows ; i++ )
1034  {
1035  diagonal[i] = 0.;
1036  for( int j=0 ; j<SparseMatrix< T >::rowSizes[i] ; j++ ) if( SparseMatrix< T >::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix< T >::m_ppElements[i][j].Value * 2;
1037  }
1038 }
1039 
1040 #endif