45 #include <ACG/GL/globjects.hh>
47 #include <QStringList>
59 PrefixSumPlan(
int _w,
int _h, GLenum _internalFmt = GL_R32F,
int _blocksize = 32);
75 int width()
const {
return width_;}
77 int height()
const {
return height_;}
79 int blocksize()
const {
return blocksize_;}
82 int elemSize()
const {
return elemSize_;}
84 int paddedDimension(
int _dim)
const;
85 int paddedBlocksize(
int _size)
const;
87 const QStringList& macros()
const {
return macros_;}
91 void enableDebugOutput() {dbgOutput_ = 1;}
92 void disableDebugOutput() {dbgOutput_ = 0;}
93 bool debugOutputEnabled() {
return dbgOutput_ != 0;}
94 void debugSetTransposedInput(
int i) {dbgTranposedInput_ = i;}
96 void enableProfiling() {dbgProfile_ = 1;}
97 void disableProfiling() {dbgProfile_ = 0;}
98 bool profilingEnabled()
const {
return dbgProfile_ != 0;}
103 static void executeRowsCPU(
int _w,
int _h, std::vector<T>& _inout);
107 static void executeColsCPU(
int _w,
int _h, std::vector<T>& _inout);
110 static bool testBuffer(
int w,
int cmpMem = 1,
int fullOutput = 0);
111 static bool test2D(
int w,
int h,
int cmpMem = 1,
int fullOutput = 0);
120 int numBlockScanGroupsX_;
121 int numBlockScanGroupsY_;
139 int dbgTranposedInput_;
145 static std::map<GLenum, const char*> datatypeMacros_;
155 SATPlan(
int _w,
int _h, GLenum _internalFmt = GL_R32F,
int _blocksize = 32);
166 void enableDebugOutput();
167 void enableProfiling();
170 static void executeCPU(
int _w,
int _h, std::vector<T>& _inout);
180 bool paddingRequired_;
185 QStringList transposeMacros_;
186 int transposeGroupSize_;
196 void PrefixSumPlan::executeRowsCPU(
int w,
int h, std::vector<T>& _inout )
200 for (
int r = 0; r < h; ++r)
204 for (
int c = 1; c < w; ++c)
206 int offsetRd = offsetRow + c-1;
207 int offsetWr = offsetRow + c;
209 T x = _inout[offsetRow];
210 _inout[offsetRow] = _inout[offsetWr];
212 if (offsetRow == offsetRd)
213 _inout[offsetWr] = x;
215 _inout[offsetWr] = _inout[offsetRd] + x;
219 memset(&_inout[offsetRow], 0,
sizeof(T));
225 void PrefixSumPlan::executeColsCPU(
int w,
int h, std::vector<T>& _inout )
229 for (
int c = 0; c < w; ++c)
231 for (
int r = 1; r < h; ++r)
233 int offsetRd = (r-1)*w + c;
234 int offsetWr = r*w + c;
237 _inout[c] = _inout[offsetWr];
240 _inout[offsetWr] = x;
242 _inout[offsetWr] = _inout[offsetRd] + x;
247 memset(&_inout[0], 0,
sizeof(T) * w);
254 void SATPlan::executeCPU(
int w,
int h, std::vector<T>& _inout )
256 PrefixSumPlan::executeRowsCPU(w,h, _inout);
257 PrefixSumPlan::executeColsCPU(w,h, _inout);