Developer Documentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
sat.cc
1 /*===========================================================================*\
2 * *
3 * OpenFlipper *
4 * Copyright (C) 2001-2011 by Computer Graphics Group, RWTH Aachen *
5 * www.openflipper.org *
6 * *
7 *--------------------------------------------------------------------------- *
8 * This file is part of OpenFlipper. *
9 * *
10 * OpenFlipper is free software: you can redistribute it and/or modify *
11 * it under the terms of the GNU Lesser General Public License as *
12 * published by the Free Software Foundation, either version 3 of *
13 * the License, or (at your option) any later version with the *
14 * following exceptions: *
15 * *
16 * If other files instantiate templates or use macros *
17 * or inline functions from this file, or you compile this file and *
18 * link it with other files to produce an executable, this file does *
19 * not by itself cause the resulting executable to be covered by the *
20 * GNU Lesser General Public License. This exception does not however *
21 * invalidate any other reasons why the executable file might be *
22 * covered by the GNU Lesser General Public License. *
23 * *
24 * OpenFlipper is distributed in the hope that it will be useful, *
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
27 * GNU Lesser General Public License for more details. *
28 * *
29 * You should have received a copy of the GNU LesserGeneral Public *
30 * License along with OpenFlipper. If not, *
31 * see <http://www.gnu.org/licenses/>. *
32 * *
33 \*===========================================================================*/
34 
35 /*===========================================================================*\
36 * *
37 * $Revision: 17080 $ *
38 * $LastChangedBy: moeller $ *
39 * $Date: 2013-07-19 12:58:31 +0200 (Fri, 19 Jul 2013) $ *
40 * *
41 \*===========================================================================*/
42 
43 #include <ACG/GL/acg_glew.hh>
44 
45 #include "sat.hh"
46 
47 #include <ACG/GL/ShaderCache.hh>
48 #include <ACG/ShaderUtils/GLSLShader.hh>
49 #include <ACG/GL/GLFormatInfo.hh>
50 
51 //#include <ACG/GL/Debug.hh>
52 //#define SAT_DBG_DIR "/home/tenter/dbg/sat_out/"
53 #define SAT_DBG_DIR "c:/dbg/sat_out/"
54 
55 //#define SAT_DBG
56 
57 
58 /*
59 implementation reference:
60 "Parallel Prefix Sum (Scans) with CUDA" by M. Harris et. al., GPU Gems 3
61 https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch39.html
62 */
63 
64 
65 std::map<GLenum, const char*> PrefixSumPlan::datatypeMacros_;
66 
67 PrefixSumPlan::PrefixSumPlan(int _w, int _h, GLenum _internalFmt, int _blocksize)
68  : width_(_w), height_(_h), blocksize_(_blocksize), internalFmt_(_internalFmt), elemSize_(0),
69  blockSumPlan_(0),
70  dbgOutput_(0), dbgTranposedInput_(0), dbgProfile_(0)
71 {
72  // blocksize must be power of 2
73  _blocksize = paddedBlocksize(_blocksize);
74 
75  // padding should not required, but uncommenting this enables automatic padding
76 // width_ = _w = paddedDimension(_w);
77 
78  numWorkGroupsX_ = _w / ( _blocksize) + (_w % _blocksize ? 1 : 0);
79  numWorkGroupsY_ = _h;
80 
81  numWorkGroupsX_ = std::max(numWorkGroupsX_, 1);
82  numWorkGroupsY_ = std::max(numWorkGroupsY_, 1);
83 
84  numBlockScanGroupsX_ = numWorkGroupsX_ / 2 + numWorkGroupsX_ % 2;
85  numBlockScanGroupsY_ = numWorkGroupsY_ / 2 + numWorkGroupsY_ % 2;
86 
87  numBlockScanGroupsX_ = std::max(numBlockScanGroupsX_, 1);
88  numBlockScanGroupsY_ = std::max(numBlockScanGroupsY_, 1);
89 
90  numDispatches_ = 1;
91 
92  // configure shader via preprocessor defines
93 
94  QString blocksizeDef;
95  blocksizeDef.sprintf("#define SAT_BLOCKSIZE %i", _blocksize);
96  macros_.push_back(blocksizeDef);
97 
98  if (_h > 1)
99  macros_.push_back("#define SAT_2D");
100 
101  if (datatypeMacros_.empty())
102  {
103  datatypeMacros_[GL_R32F] = "SAT_FLOAT1";
104  datatypeMacros_[GL_RG32F] = "SAT_FLOAT2";
105  datatypeMacros_[GL_RGBA32F] = "SAT_FLOAT4";
106 
107  datatypeMacros_[GL_R32I] = "SAT_INT1";
108  datatypeMacros_[GL_RG32I] = "SAT_INT2";
109  datatypeMacros_[GL_RGBA32I] = "SAT_INT4";
110 
111  datatypeMacros_[GL_R32UI] = "SAT_UINT1";
112  datatypeMacros_[GL_RG32UI] = "SAT_UINT2";
113  datatypeMacros_[GL_RGBA32UI] = "SAT_UINT4";
114  }
115 
116  std::map<GLenum, const char*>::iterator datatype = datatypeMacros_.find(_internalFmt);
117 
118  if (datatype != datatypeMacros_.end())
119  {
120  macros_.push_back(QString("#define SAT_DATATYPE ") + QString(datatype->second));
121 
122  bool success = false;
123  elemSize_ = 4 * QString(datatype->second).right(1).toInt(&success);
124 
125  if (!success)
126  std::cout << "SATPlan: failed to get size of format " << datatype->second << std::endl;
127  }
128  else
129  std::cout << "SATPlan: unsupported texture format " << _internalFmt << std::endl;
130 
131 
132  // requires blocksum reduction chain
133  if (numWorkGroupsX_ > 1)
134  {
135  macros_.push_back("#define SAT_BLOCKSCANOUT");
136  blockSumPlan_ = new PrefixSumPlan(numWorkGroupsX_, height_, _internalFmt, _blocksize);
137  }
138 }
139 
140 PrefixSumPlan::~PrefixSumPlan()
141 {
142 }
143 
144 int PrefixSumPlan::paddedDimension(int _dim) const
145 {
146  int padding = _dim % (2*blocksize_);
147  if (padding)
148  return _dim + 2*blocksize_ - padding;
149  return _dim;
150 }
151 
152 int PrefixSumPlan::paddedBlocksize( int _size ) const
153 {
154  int i = 1;
155  while (i < _size) i <<= 1;
156  return i;
157 }
158 
159 bool PrefixSumPlan::execute( ACG::TextureBuffer* _src, ACG::TextureBuffer* _dst )
160 {
161  bool success = true;
162 
163  GLSL::Program* satCS = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/psum.glsl", &macros_);
164 
165  if (satCS)
166  {
167  if (dbgProfile_)
168  perfCounter_.restart();
169 
170  _src->bindAsImage(0, GL_READ_ONLY);
171  _dst->bindAsImage(1, GL_WRITE_ONLY);
172 
173  if (numWorkGroupsX_ > 1)
174  {
175  if (!blockSums_.is_valid())
176  {
177  blockSums_.setBufferData(numBlockScanGroupsX_ * 2 * elemSize_, 0, internalFmt_);
178  blockSumsOut_.setBufferData(numBlockScanGroupsX_ * 2 * elemSize_, 0, internalFmt_);
179  }
180  blockSums_.bindAsImage(2, GL_WRITE_ONLY);
181  }
182 
183 
184  satCS->use();
185 
186  glDispatchCompute(numWorkGroupsX_,numWorkGroupsY_,1);
187  glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
188 
189  if (dbgProfile_)
190  std::cout << "psum: " << perfCounter_.elapsedMs() << std::endl;
191 
192 #ifdef SAT_DBG
193  if (dbgOutput_)
194  ACG::GLDebug::dumpBufferData(GL_TEXTURE_BUFFER, _dst->getBufferId(), SAT_DBG_DIR "1d_pass1.bin");
195 #endif
196 
197  if (numWorkGroupsX_ > 1)
198  {
199  if (blockSumPlan_)
200  {
201  if (dbgProfile_)
202  perfCounter_.restart();
203 
204  blockSumPlan_->execute(&blockSums_, &blockSumsOut_);
205 
206  if (dbgProfile_)
207  std::cout << "psum-blockscan: " << perfCounter_.elapsedMs() << std::endl;
208 
209 
210  GLSL::Program* satCSBlockMerge = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/psum_blockmerge.glsl", &macros_);
211 
212 
213  if (satCSBlockMerge)
214  {
215  if (dbgProfile_)
216  perfCounter_.restart();
217 
218  _dst->bindAsImage(0, GL_READ_WRITE);
219  blockSumsOut_.bindAsImage(1, GL_READ_ONLY);
220 
221  satCSBlockMerge->use();
222  glDispatchCompute(numWorkGroupsX_-1,1,1);
223  glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
224 
225  if (dbgProfile_)
226  std::cout << "psum-blockmerge: " << perfCounter_.elapsedMs() << std::endl;
227 
228  success = true;
229  }
230  }
231  }
232  else
233  success = true;
234 
235 #ifdef SAT_DBG
236  if (dbgOutput_)
237  {
238  ACG::GLDebug::dumpBufferData(GL_TEXTURE_BUFFER, _dst->getBufferId(), SAT_DBG_DIR "1d_act.bin");
239  if (blockSumsOut_.is_valid())
240  ACG::GLDebug::dumpBufferData(GL_TEXTURE_BUFFER, blockSumsOut_.getBufferId(), SAT_DBG_DIR "1d_bsum.bin");
241  }
242 #endif
243  }
244 
245  return success;
246 }
247 
248 
249 bool PrefixSumPlan::execute( ACG::Texture2D* _src, ACG::Texture2D* _dst )
250 {
251  bool success = false;
252 
253  GLSL::Program* satCS = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/psum.glsl", &macros_);
254 
255 #ifdef SAT_DBG
256  if (dbgOutput_)
257  ACG::GLDebug::dumpTexture2D(_src->id(), 0, _src->getFormat(), _src->getType(), elemSize_ * _src->getWidth() * _src->getHeight(), QString(QString(SAT_DBG_DIR "2d_") + QString(dbgTranposedInput_ ? "cols_" : "rows_") + QString("input.bin")).toLatin1(), true);
258 #endif
259 
260  if (satCS)
261  {
262  // 1. divide: compute local prefixsums in each block of each row
263 
264  if (dbgProfile_)
265  perfCounter_.restart();
266 
267  _src->bindAsImage(0, GL_READ_ONLY);
268  _dst->bindAsImage(1, GL_WRITE_ONLY);
269 
270  if (numWorkGroupsX_ > 1)
271  {
272  if (!blockSums2D_.is_valid())
273  {
274  blockSums2D_.setStorage(1, internalFmt_, 2 * numBlockScanGroupsX_, height_);
275  blockSums2D_.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
276  blockSums2D_.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
277  blockSums2DOut_.setStorage(1, internalFmt_, 2 * numBlockScanGroupsX_, height_);
278  blockSums2DOut_.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
279  blockSums2DOut_.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
280  }
281 
282  blockSums2D_.bindAsImage(2, GL_WRITE_ONLY);
283  }
284 
285  satCS->use();
286 
287  glDispatchCompute(numWorkGroupsX_,height_,1);
288  glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
289 
290  if (dbgProfile_)
291  std::cout << "psum-blockscan: " << perfCounter_.elapsedMs() << std::endl;
292 
293 #ifdef SAT_DBG
294  if (dbgOutput_)
295  ACG::GLDebug::dumpTexture2D(_dst->id(), 0, _dst->getFormat(), _dst->getType(), elemSize_ * _dst->getWidth() * _dst->getHeight(), (QString(SAT_DBG_DIR "2d_pass1") + QString(dbgTranposedInput_? "_cols" : "_rows") + QString(".bin")).toLatin1(), true);
296 #endif
297 
298  if (numWorkGroupsX_ > 1)
299  {
300  if (blockSumPlan_)
301  {
302  // 2. blockscan: compute prefixsum of the blocksums in each row
303 
304  if (dbgProfile_)
305  perfCounter_.restart();
306 
307  blockSumPlan_->execute(&blockSums2D_, &blockSums2DOut_);
308 
309  if (dbgProfile_)
310  std::cout << "psum-blockscan: " << perfCounter_.elapsedMs() << std::endl;
311 
312  GLSL::Program* satCSBlockMerge = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/psum_blockmerge.glsl", &macros_);
313 
314 
315  if (satCSBlockMerge)
316  {
317  // 3. blockmerge: add prefixsum of the blocksums to the local prefixsums in the row blocks
318 
319  if (dbgProfile_)
320  perfCounter_.restart();
321 
322  _dst->bindAsImage(0, GL_READ_WRITE);
323  blockSums2DOut_.bindAsImage(1, GL_READ_ONLY);
324 
325  satCSBlockMerge->use();
326  glDispatchCompute(numWorkGroupsX_-1,height_,1);
327  glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
328 
329  if (dbgProfile_)
330  std::cout << "psum-blockmerge: " << perfCounter_.elapsedMs() << std::endl;
331 
332  success = true;
333  }
334  }
335  }
336  else
337  success = true;
338 
339  // now, each row contains the prefixsums of the rows in the input image
340 
341 #ifdef SAT_DBG
342  if (dbgOutput_)
343  {
344  ACG::GLDebug::dumpTexture2D(_dst->id(), 0, _dst->getFormat(), _dst->getType(), elemSize_ * _dst->getWidth() * _dst->getHeight(), QString(QString(SAT_DBG_DIR "2d_") + QString(dbgTranposedInput_? "_cols" : "_rows") + QString(".bin")).toLatin1(), true);
345  if (blockSums2DOut_.is_valid())
346  ACG::GLDebug::dumpTexture2D(blockSums2DOut_.id(), 0, blockSums2DOut_.getFormat(), blockSums2DOut_.getType(), elemSize_ * width_ * height_, QString(QString(SAT_DBG_DIR "2d_") + QString(dbgTranposedInput_? "_cols" : "_rows") + QString("_bsum.bin")).toLatin1(), true);
347  }
348 #endif
349  }
350 
351  return success;
352 }
353 
354 
355 
356 bool PrefixSumPlan::padInput( ACG::Texture2D* _src, ACG::Texture2D* _dst, bool _padWidthAndHeight )
357 {
358  bool success = false;
359 
360  GLSL::Program* padCS = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/pad.glsl", &macros_);
361 
362  if (padCS)
363  {
364  padCS->use();
365 
366  _src->bindAsImage(0, GL_READ_ONLY);
367 
368  int padHeight = height_;
369  if (_padWidthAndHeight)
370  padHeight = paddedDimension(padHeight);
371 
372  if (!_dst->is_valid())
373  {
374  _dst->setStorage(1, internalFmt_, width_, padHeight);
375  _dst->parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
376  _dst->parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
377  }
378 
379  _dst->bindAsImage(1, GL_WRITE_ONLY);
380 
381  glDispatchCompute(numWorkGroupsX_, padHeight / blocksize_, 1);
382 
383  success = true;
384  }
385 
386  return success;
387 }
388 
389 
390 
391 
392 SATPlan::SATPlan( int _w, int _h, GLenum _internalFmt /*= GL_R32F*/, int _blocksize /*= 32*/ )
393  : rows_(0), cols_(0), paddingRequired_(false), transposeGroupSize_(0)
394 {
395  rows_ = new PrefixSumPlan(_w, _h, _internalFmt, _blocksize);
396  cols_ = new PrefixSumPlan(_h, _w, _internalFmt, _blocksize);
397 
398  paddingRequired_ = _w != rows_->width() || _h != cols_->width();
399 
400  rows_->debugSetTransposedInput(0);
401  cols_->debugSetTransposedInput(1);
402 
403  transposeMacros_ = rows_->macros();
404 
405 
406  transposeGroupSize_ = _blocksize;
407  int maxInvocations = GLSL::ComputeShader::caps().maxWorkGroupInvocations_;
408 
409  if (transposeGroupSize_*transposeGroupSize_ > maxInvocations)
410  {
411  transposeGroupSize_ = int(sqrtf(float(maxInvocations)));
412 
413  QString cappedBlocksize;
414  cappedBlocksize.sprintf("#define SAT_BLOCKSIZE %d", transposeGroupSize_);
415 
416  transposeMacros_.push_back("#undef SAT_BLOCKSIZE");
417  transposeMacros_.push_back(cappedBlocksize);
418  }
419 }
420 
421 SATPlan::~SATPlan()
422 {
423  delete rows_;
424  delete cols_;
425 }
426 
427 bool SATPlan::transpose( ACG::Texture2D* _src, ACG::Texture2D* _dst )
428 {
429  GLSL::Program* transposeCS = ACG::ShaderCache::getInstance()->getComputeProgram("SAT/transpose.glsl", &transposeMacros_);
430 
431  if (transposeCS)
432  {
433  if (rows_->profilingEnabled())
434  perfCounter_.restart();
435 
436  _src->bindAsImage(0, GL_READ_ONLY);
437  _dst->bindAsImage(1, GL_WRITE_ONLY);
438 
439  int w = _src->getWidth();
440  int h = _src->getHeight();
441  int blocksize = transposeGroupSize_;
442 
443  int numGroupsX = w / blocksize + (w % blocksize ? 1 : 0);
444  int numGroupsY = h / blocksize + (h % blocksize ? 1 : 0);
445 
446  transposeCS->use();
447  glDispatchCompute(numGroupsX, numGroupsY, 1);
448  glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
449 
450  if (rows_->profilingEnabled())
451  std::cout << "transpose: " << perfCounter_.elapsedMs() << std::endl;
452  }
453 
454  return transposeCS != 0;
455 }
456 
457 bool SATPlan::execute( ACG::Texture2D* _src, ACG::Texture2D* _dst )
458 {
459  // SAT via separable prefixsum filters
460 
461 
462  if (paddingRequired_)
463  {
464  if (rows_->profilingEnabled())
465  perfCounter_.restart();
466 
467  if (!paddedInput_.is_valid())
468  {
469  paddedInput_.setStorage(1, _src->getInternalFormat(), rows_->width(), cols_->width());
470  paddedInput_.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
471  paddedInput_.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
472  }
473  rows_->padInput(_src, &paddedInput_, true);
474 
475  if (rows_->profilingEnabled())
476  std::cout << "padding: " << perfCounter_.elapsedMs() << std::endl;
477  }
478 
479 
480  if (rows_->profilingEnabled())
481  perfCounter_.restart();
482 
483  // prefixsum of rows
484  bool success = rows_->execute(paddingRequired_ ? &paddedInput_ : _src, _dst);
485 
486  if (rows_->profilingEnabled())
487  std::cout << "rows: " << perfCounter_.elapsedMs() << std::endl;
488 
489 
490  if (success)
491  {
492  if (!transposedSrc_.is_valid())
493  {
494  transposedSrc_.setStorage(1, _src->getInternalFormat(), cols_->width(), cols_->height());
495  transposedSrc_.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
496  transposedSrc_.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
497  }
498  if (!transposedDst_.is_valid())
499  {
500  transposedDst_.setStorage(1, _src->getInternalFormat(), cols_->width(), cols_->height());
501  transposedDst_.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
502  transposedDst_.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
503  }
504 
505  // transpose
506  success = transpose(_dst, &transposedSrc_);
507 
508 #ifdef SAT_DBG
509  if (rows_->debugOutputEnabled())
510  ACG::GLDebug::dumpTexture2D(transposedSrc_.id(), 0, transposedSrc_.getFormat(), transposedSrc_.getType(), rows_->elemSize() * transposedSrc_.getWidth() * transposedSrc_.getHeight(), SAT_DBG_DIR "2d_rows_tr.bin", true);
511 #endif
512 
513  if (success)
514  {
515  if (rows_->profilingEnabled())
516  perfCounter_.restart();
517 
518  // prefixsum of cols to compute SAT
519  success = cols_->execute(&transposedSrc_, &transposedDst_);
520 
521  if (rows_->profilingEnabled())
522  std::cout << "cols: " << perfCounter_.elapsedMs() << std::endl;
523 
524 #ifdef SAT_DBG
525  if (rows_->debugOutputEnabled())
526  ACG::GLDebug::dumpTexture2D(transposedDst_.id(), 0, transposedSrc_.getFormat(), transposedSrc_.getType(), rows_->elemSize() * transposedSrc_.getWidth() * transposedSrc_.getHeight(), SAT_DBG_DIR "2d_sat_tr.bin", true);
527 #endif
528 
529  if (success)
530  {
531  // transpose back
532  success = transpose(&transposedDst_, _dst);
533 
534 #ifdef SAT_DBG
535  if (rows_->debugOutputEnabled())
536  ACG::GLDebug::dumpTexture2D(_dst->id(), 0, transposedSrc_.getFormat(), transposedSrc_.getType(), rows_->elemSize() * transposedSrc_.getWidth() * transposedSrc_.getHeight(), SAT_DBG_DIR "2d_sat.bin", true);
537 #endif
538  }
539  }
540  }
541 
542  return success;
543 }
544 
545 void SATPlan::enableDebugOutput()
546 {
547  rows_->enableDebugOutput();
548  cols_->enableDebugOutput();
549 }
550 
551 void SATPlan::enableProfiling()
552 {
553  rows_->enableProfiling();
554  cols_->enableProfiling();
555 }
556 
557 
558 
559 
560 
561 
562 
563 bool PrefixSumPlan::testBuffer(int w, int cmpMem, int fullOutput)
564 {
565  ACG::TextureBuffer testBuffer, testBufferOut;
566 
567  int numTestVals = w;
568  int blockSize = 32;
569  int numBlocks = numTestVals / blockSize;
570  GLenum internalfmt = GL_R32I;
571  int elemDim = ACG::GLFormatInfo(internalfmt).channelCount();
572 
573  std::vector<int> testData(numTestVals * elemDim);
574 
575  for (int i = 0; i < numTestVals; ++i)
576  {
577  testData[i*elemDim] = i;
578  if (elemDim > 1)
579  testData[i*elemDim+1] = i+1;
580  if (elemDim > 2)
581  testData[i*elemDim+2] = -i;
582  if (elemDim > 3)
583  testData[i*elemDim+3] = i*2;
584  }
585 
586  testBuffer.setBufferData(testData.size()*sizeof(testData[0]), &testData[0], internalfmt);
587  testBufferOut.setBufferData(testData.size()*sizeof(testData[0]), &testData[0], internalfmt);
588 
589  // compute expected result
590  std::vector<int> expectedData(numTestVals * elemDim, 0);
591 
592  for (int i = 1; i < numTestVals; ++i)
593  {
594  for (int k = 0; k < elemDim; ++k)
595  expectedData[i*elemDim + k] = testData[(i-1)*elemDim + k] + expectedData[(i-1)*elemDim + k];
596  }
597 
598 
599 
600 
601  PrefixSumPlan plan(numTestVals, 1, internalfmt, blockSize);
602  if (fullOutput)
603  plan.enableDebugOutput();
604  plan.execute(&testBuffer, &testBufferOut);
605 
606 
607  bool success = true;
608 
609  if (cmpMem)
610  {
611  int bufSize = numTestVals * elemDim * 4;
612  char* pActBuf = new char[bufSize];
613  testBufferOut.getBufferData(pActBuf);
614 
615  if (memcmp(pActBuf, &expectedData[0], bufSize))
616  {
617  printf("error for %d\n", w);
618  success = false;
619  }
620 
621  if (!success)
622  {
623  char szFileOut[0xff];
624  sprintf(szFileOut, SAT_DBG_DIR "1d_act_%03d.bin", numTestVals);
625  FILE* pFile = fopen(szFileOut, "wb");
626  if (pFile)
627  {
628  fwrite(pActBuf, 1, bufSize, pFile);
629  fclose(pFile);
630  }
631 
632  sprintf(szFileOut, SAT_DBG_DIR "1d_in_%03d.bin", numTestVals);
633  pFile = fopen(szFileOut, "wb");
634  if (pFile)
635  {
636  fwrite(&testData[0], sizeof(testData[0]), testData.size(), pFile);
637  fclose(pFile);
638  }
639 
640  sprintf(szFileOut, SAT_DBG_DIR "1d_exp_%03d.bin", numTestVals);
641  pFile = fopen(szFileOut, "wb");
642  if (pFile)
643  {
644  fwrite(&expectedData[0], sizeof(expectedData[0]), expectedData.size(), pFile);
645  fclose(pFile);
646  }
647 
648  sprintf(szFileOut, SAT_DBG_DIR "1d_expbsum_%03d.bin", numTestVals);
649  pFile = fopen(szFileOut, "wb");
650  if (pFile)
651  {
652  int bsum[4] = {0};
653 
654  for (int i = 1; i < numBlocks; ++i)
655  {
656  memset(bsum, 0, sizeof(bsum));
657 
658  for (int e = 0; e < elemDim; ++e)
659  {
660  int shift = 0;
661 
662 // if (i > 1)
663  shift = expectedData[((i-1)*blockSize) * elemDim + e];
664 
665  bsum[e] = expectedData[(i*blockSize) * elemDim + e] - shift;
666  }
667 // bsum[e] += testData[offset + k * elemDim + e];
668 
669  fwrite(bsum, 4, elemDim, pFile);
670 
671 // if (i > 0)
672 // {
673 // int offset = i * blockSize * elemDim;
674 //
675 // for (int k = 0; k < blockSize; ++k)
676 // {
677 // for (int e = 0; e < elemDim; ++e)
678 // bsum[e] += testData[offset + k * elemDim + e];
679 // }
680 // }
681  }
682  fclose(pFile);
683  }
684  }
685 
686  delete [] pActBuf;
687  }
688 
689  return success;
690 }
691 
692 
693 
694 bool PrefixSumPlan::test2D( int w, int h, int cmpMem, int fullOutput )
695 {
696  int numTestVals = w*h;
697  int blockSize = 32;
698  int numBlocks = numTestVals / blockSize;
699  GLenum internalfmt = GL_R32I;
700  ACG::GLFormatInfo finfo(internalfmt);
701  int elemDim = finfo.channelCount();
702 
703  std::vector<int> testData(numTestVals);
704  for (int i = 0; i < numTestVals; ++i)
705  testData[i] = i;
706 
707 // std::vector<ACG::Vec2i> testData(numTestVals);
708 // for (int i = 0; i < numTestVals; ++i)
709 // testData[i] = ACG::Vec2i(i,i+1);
710 
711  // compute expected result
712  std::vector<int> expectedData = testData;
713 // std::vector<ACG::Vec2i> expectedData = testData;
714  PrefixSumPlan::executeRowsCPU(w,h, expectedData);
715 
716 
717  char szFileOut[0xff];
718 // sprintf(szFileOut, SAT_DBG_DIR "2d_in_%dx%d.bin", w,h);
719 // FILE* pFile = fopen(szFileOut, "wb");
720 // if (pFile)
721 // {
722 // fwrite(&testData[0], sizeof(testData[0]), testData.size(), pFile);
723 // fclose(pFile);
724 // }
725 //
726 //
727 
728 
729  PrefixSumPlan::executeColsCPU(w,h,expectedData);
730 
731 
732 
733 
734  ACG::Texture2D testBuffer, testBufferOut;
735  testBuffer.setData(0, internalfmt, w, h, finfo.format(), finfo.type(), &testData[0]);
736 // testBuffer.setData(0, internalfmt, w, h, GL_RG_INTEGER, GL_INT, &testData[0]);
737  testBufferOut.setStorage(1, internalfmt, w, h);
738 
739  testBuffer.bind();
740  testBuffer.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
741  testBuffer.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
742 
743  testBufferOut.bind();
744  testBufferOut.parameter(GL_TEXTURE_MIN_FILTER, GL_NEAREST);
745  testBufferOut.parameter(GL_TEXTURE_MAG_FILTER, GL_NEAREST);
746  // testBufferOut.parameter(GL_TEXTURE_BASE_LEVEL, 0);
747  // testBufferOut.parameter(GL_TEXTURE_MAX_LEVEL, 1);
748 
749 
750 
751 // PrefixSumPlan plan(w,h, internalfmt, blockSize);
752  SATPlan plan(w,h, internalfmt, blockSize);
753 
754  if (fullOutput)
755  plan.enableDebugOutput();
756  plan.execute(&testBuffer, &testBufferOut);
757 
758  bool success = true;
759 
760  if (cmpMem)
761  {
762  int bufSize = numTestVals * elemDim * 4;
763  char* pActBuf = new char[bufSize];
764 #ifdef SAT_DBG
765  ACG::GLDebug::getTextureData2D(testBufferOut.id(), 0, finfo.format(), finfo.type(), bufSize, pActBuf);
766 #endif
767 
768  if (memcmp(pActBuf, &expectedData[0], bufSize))
769  {
770  printf("error for %dx%d\n", w,h);
771  success = false;
772  }
773 
774  delete [] pActBuf;
775 
776 
777  if (!success)
778  {
779  sprintf(szFileOut, SAT_DBG_DIR "2d_exp_%dx%d.bin", w,h);
780  FILE* pFile = fopen(szFileOut, "wb");
781  if (pFile)
782  {
783  fwrite(&expectedData[0], sizeof(expectedData[0]), expectedData.size(), pFile);
784  fclose(pFile);
785  }
786 
787 #ifdef SAT_DBG
788  sprintf(szFileOut, SAT_DBG_DIR "2d_act_%dx%d.bin", w,h);
789  ACG::GLDebug::dumpTexture2D(testBufferOut.id(), 0, finfo.format(), finfo.type(), numTestVals * elemDim * 4, szFileOut, true);
790 #endif
791 
792 
793  expectedData = testData;
794  PrefixSumPlan::executeRowsCPU(w,h,expectedData);
795  sprintf(szFileOut, SAT_DBG_DIR "2d_exp_rows_%dx%d.bin", w,h);
796  pFile = fopen(szFileOut, "wb");
797  if (pFile)
798  {
799  fwrite(&expectedData[0], sizeof(expectedData[0]), expectedData.size(), pFile);
800  fclose(pFile);
801  }
802  }
803 
804  }
805 
806  return success;
807 }
static ShaderCache * getInstance()
Return instance of the ShaderCache singleton.
Definition: ShaderCache.cc:90
Definition: sat.hh:151
GLSL program class.
Definition: GLSLShader.hh:217
GLuint64 elapsedMs()
elapsed gpu time in millisecs
Definition: globjects.cc:1253
GLSL::Program * getComputeProgram(const char *_computeShaderFile, QStringList *_macros=0, bool _verbose=true)
Query a static compute shader program from cache.
Definition: ShaderCache.cc:447
void use()
Enables the program object for using.
Definition: GLSLShader.cc:351