Developer Documentation
Histogram.hh
1 /*===========================================================================*\
2  * *
3  * OpenFlipper *
4  * Copyright (c) 2001-2015, RWTH-Aachen University *
5  * Department of Computer Graphics and Multimedia *
6  * All rights reserved. *
7  * www.openflipper.org *
8  * *
9  *---------------------------------------------------------------------------*
10  * This file is part of OpenFlipper. *
11  *---------------------------------------------------------------------------*
12  * *
13  * Redistribution and use in source and binary forms, with or without *
14  * modification, are permitted provided that the following conditions *
15  * are met: *
16  * *
17  * 1. Redistributions of source code must retain the above copyright notice, *
18  * this list of conditions and the following disclaimer. *
19  * *
20  * 2. Redistributions in binary form must reproduce the above copyright *
21  * notice, this list of conditions and the following disclaimer in the *
22  * documentation and/or other materials provided with the distribution. *
23  * *
24  * 3. Neither the name of the copyright holder nor the names of its *
25  * contributors may be used to endorse or promote products derived from *
26  * this software without specific prior written permission. *
27  * *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
31  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
32  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
33  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
34  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
35  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
36  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
37  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
38  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
39  * *
40 \*===========================================================================*/
41 
42 #ifndef ACG_HISTOGRAM_HH
43 #define ACG_HISTOGRAM_HH
44 
45 #include <vector>
46 #include <cassert>
47 #include <memory>
48 #include <exception>
49 #include <algorithm>
50 #include <type_traits>
51 
52 #include <QString>
53 
54 #include "../Config/ACGDefines.hh"
55 
56 namespace ACG {
57 class ACGDLLEXPORT Histogram {
58 public:
59  enum class LabelType {
60  PerBin,
61  PerBoundary,
62  };
63 
64  virtual ~Histogram() = default;
65  const std::vector<size_t> &getBins() const { return bins_; }
66  const std::vector<double> &getBinWidths() const { return bin_widths_; }
67  virtual double getTotalWidth() const = 0;
68 
69  virtual LabelType getLabelType() const = 0;
70  virtual QString getBoundaryLabel(size_t /*idx*/) const { assert(false); return QString();}
71  virtual QString getBinLabel (size_t /*idx*/) const { assert(false); return QString();}
72 
73 protected:
74  std::vector<size_t> bins_;
75  std::vector<double> bin_widths_;
76 };
77 
78 
79 // we need to be careful with ranges, some sums (e.g. INT_MAX - INT_MIN) do not fit into a signed int,
80 // so we store bin sizes as doubles. With specialization or some tricks we
81 // could probably use the next-biggest integer type, but if we're using
82 // the biggest integer type already, we should to fall back to double anyways.
83 
84 template<typename T>
85 class HistogramT : public Histogram {
86 public:
87  HistogramT(const std::vector<int> &histogram,
88  const std::vector<T> &bin_boundaries,
89  const std::vector<double> &bin_widths)
90  {
91  if (bins_.size() != bin_widths_.size()
92  || bins_.size() + 1 != bin_boundaries_.size()) {
93  throw std::runtime_error("Histogram constructor sizes don't match.");
94  }
95  bins_ = histogram;
96  bin_boundaries_ = bin_boundaries;
97  bin_widths_ = bin_widths;
98  double range = bin_boundaries.back() - bin_boundaries.front();
99  avg_bin_size_ = range / bins_.size();
100  }
101 
102  template<typename IterT>
103  HistogramT(IterT begin, IterT end, size_t max_bins)
104  {
105  static_assert(std::is_assignable<T&, typename IterT::value_type>::value, "IterT incompatible with T.");
106  static_assert(std::is_floating_point<typename IterT::value_type>::value, "HistogramT currently only supports floating point values.");
107  assert(max_bins > 0);
108  const size_t n = std::distance(begin, end);
109  if (n == 0) return;
110 
111  const auto minmax = std::minmax_element(begin, end);
112  const T min = *minmax.first;
113  const T max = *minmax.second;
114  const double min_dbl = static_cast<double>(min);
115  const double range = static_cast<double>(max) - min_dbl;
116 
117  const size_t n_bins_max = std::min(max_bins, n);
118  bin_boundaries_.reserve(n_bins_max + 1);
119 
120  T last_boundary = min;
121  bin_boundaries_.push_back(min);
122  for (size_t i = 1; i < n_bins_max; ++i) {
123  // Adding range/n_bins to a accumulator might seem more efficient/elegant,
124  // but might cause numeric issues.
125 
126  // This multiplication order is bad for huge ranges that cause overflows,
127  // however I assume tiny ranges are more common than huge values and more
128  // important to get right. If you disagree, add a case distinction or something better.
129 
130  T boundary = static_cast<T>(min + (i * range) / n_bins_max);
131  // avoid zero-sized bins (happens for many ints with values in a small range)
132  if (boundary != last_boundary || i == 0) {
133  bin_boundaries_.push_back(boundary);
134  bin_widths_.push_back(boundary - last_boundary);
135  }
136  last_boundary = boundary;
137  }
138  bin_boundaries_.push_back(max); // avoid rounding issues etc by explicitly picking max.
139  bin_widths_.push_back(max - last_boundary);
140 
141  bin_boundaries_.shrink_to_fit();
142  size_t n_bins = bin_boundaries_.size() - 1;
143  bins_.resize(n_bins);
144 
145  // note that due to rounding, our bins may have differing sizes, which matters
146  // if we handle integral types (relative size difference worst case: bin width 1 vs 2).
147  // Be careful to select the right bin.
148  std::for_each(begin, end, [&](const T &val) {
149  auto it = std::upper_bound(bin_boundaries_.begin(), bin_boundaries_.end(), val);
150  if (it == bin_boundaries_.end()) --it; // the last value is exactly max!
151  size_t idx = std::distance(bin_boundaries_.begin(), it);
152  assert(idx > 0);
153  ++bins_[idx - 1];
154  });
155  avg_bin_size_ = range / n_bins;
156  }
157 
158  const std::vector<T> &getBinBoundaries() const {
159  return bin_boundaries_;
160  }
161 
162  double getTotalWidth() const override
163  {
164  return bin_boundaries_.back() - bin_boundaries_.front();
165  }
166 
167  LabelType getLabelType() const override
168  {
169  return LabelType::PerBoundary;
170  }
171 
172  QString getBoundaryLabel (size_t idx) const override;
173 
174 
175 private:
176  std::vector<T> bin_boundaries_;
177  double avg_bin_size_ = 0.0;
178 };
179 
180 
181 template<typename T>
182 QString HistogramT<T>::getBoundaryLabel(size_t idx) const {
183  return QString::number(bin_boundaries_[idx]);
184 }
185 
186 } // namespace ACG
187 
188 #endif // ACG_HISTOGRAM_HH
Namespace providing different geometric functions concerning angles.