Viskores  1.0
DescriptiveStatistics.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 #ifndef viskores_worklet_DescriptiveStatistics_h
19 #define viskores_worklet_DescriptiveStatistics_h
20 
25 
26 namespace viskores
27 {
28 namespace worklet
29 {
31 {
32 public:
33  template <typename T>
34  struct StatState
35  {
38  : n_(0)
39  , min_(std::numeric_limits<T>::max())
40  , max_(std::numeric_limits<T>::lowest())
41  , sum_(0)
42  , mean_(0)
43  , M2_(0)
44  , M3_(0)
45  , M4_(0)
46  {
47  }
48 
50  StatState(T value)
51  : n_(1)
52  , min_(value)
53  , max_(value)
54  , sum_(value)
55  , mean_(value)
56  , M2_(0)
57  , M3_(0)
58  , M4_(0)
59  {
60  }
61 
63  StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4)
64  : n_(n)
65  , min_(min)
66  , max_(max)
67  , sum_(sum)
68  , mean_(mean)
69  , M2_(M2)
70  , M3_(M3)
71  , M4_(M4)
72  {
73  }
74 
77  {
78  const StatState<T>& x = *this;
79  if (y.n_ == 0)
80  {
81  return x;
82  }
83  if (x.n_ == 0)
84  {
85  return y;
86  }
87 
88  StatState result;
89  result.n_ = x.n_ + y.n_;
90 
91  result.min_ = viskores::Min(x.min_, y.min_);
92  result.max_ = viskores::Max(x.max_, y.max_);
93 
94  // TODO: consider implementing compensated sum
95  // https://en.wikipedia.org/wiki/Kahan_summation_algorithm
96  result.sum_ = x.sum_ + y.sum_;
97 
98  // It is tempting to try to deviate from the literature and calculate
99  // mean in each "reduction" from sum and n. This saves one multiplication.
100  // However, RESIST THE TEMPTATION!!! This takes us back to the naive
101  // algorithm (mean = sum of a bunch of numbers / N) that actually
102  // accumulates more error and causes problem when calculating M2
103  // (and thus variance).
104  // TODO: Verify that FieldStatistics exhibits the same problem since
105  // it is using a "parallel" version of the naive algorithm as well.
106  // TODO: or better, just deprecate FieldStatistics.
107  T delta = y.mean_ - x.mean_;
108  result.mean_ = x.mean_ + delta * y.n_ / result.n_;
109 
110  T delta2 = delta * delta;
111  result.M2_ = x.M2_ + y.M2_ + delta2 * x.n_ * y.n_ / result.n_;
112 
113  T delta3 = delta * delta2;
114  T n2 = result.n_ * result.n_;
115  result.M3_ = x.M3_ + y.M3_;
116  result.M3_ += delta3 * x.n_ * y.n_ * (x.n_ - y.n_) / n2;
117  result.M3_ += T(3.0) * delta * (x.n_ * y.M2_ - y.n_ * x.M2_) / result.n_;
118 
119  T delta4 = delta2 * delta2;
120  T n3 = result.n_ * n2;
121  result.M4_ = x.M4_ + y.M4_;
122  result.M4_ += delta4 * x.n_ * y.n_ * (x.n_ * x.n_ - x.n_ * y.n_ + y.n_ * y.n_) / n3;
123  result.M4_ += T(6.0) * delta2 * (x.n_ * x.n_ * y.M2_ + y.n_ * y.n_ * x.M2_) / n2;
124  result.M4_ += T(4.0) * delta * (x.n_ * y.M3_ - y.n_ * x.M3_) / result.n_;
125 
126  return result;
127  }
128 
129  VISKORES_EXEC_CONT T N() const { return this->n_; }
130 
132  T Min() const { return this->min_; }
133 
135  T Max() const { return this->max_; }
136 
138  T Sum() const { return this->sum_; }
139 
141  T Mean() const { return this->mean_; }
142 
144  T M2() const { return this->M2_; }
145 
147  T M3() const { return this->M3_; }
148 
150  T M4() const { return this->M4_; }
151 
153  T SampleStddev() const { return viskores::Sqrt(this->SampleVariance()); }
154 
156  T PopulationStddev() const { return viskores::Sqrt(this->PopulationVariance()); }
157 
159  T SampleVariance() const
160  {
161  if (this->n_ <= 1)
162  {
163  return 0;
164  }
165  return this->M2_ / (this->n_ - 1);
166  }
167 
170  {
171  if (this->M2_ == 0 || this->n_ == 0)
172  {
173  return T(0);
174  }
175  return this->M2_ / this->n_;
176  }
177 
179  T Skewness() const
180  {
181  if (this->M2_ == 0 || this->n_ == 0)
182  // Shamelessly swiped from Boost Math
183  // The limit is technically undefined, but the interpretation here is clear:
184  // A constant dataset has no skewness.
185  return T(0);
186  else
187  return viskores::Sqrt(this->n_) * this->M3_ / viskores::Pow(this->M2_, T{ 1.5 });
188  }
189 
191  T Kurtosis() const
192  {
193  if (this->M2_ == 0 || this->n_ == 0)
194  // Shamelessly swiped from Boost Math
195  // The limit is technically undefined, but the interpretation here is clear:
196  // A constant dataset has no kurtosis.
197  return T(0);
198  else
199  return this->n_ * this->M4_ / (this->M2_ * this->M2_);
200  }
201 
202  private:
203  // GCC4.8 is not happy about initializing data members here.
204  T n_;
205  T min_;
206  T max_;
207  T sum_;
208  T mean_;
209  T M2_;
210  T M3_;
211  T M4_;
212  }; // StatState
213 
215  {
216  template <typename T>
218  T value) const
219  {
221  }
222  };
223 
233  template <typename FieldType, typename Storage>
236  {
237  using Algorithm = viskores::cont::Algorithm;
238 
239  // Essentially a TransformReduce. Do we have that convenience in Viskores?
241  return Algorithm::Reduce(states, StatState<FieldType>{});
242  }
243 
244  template <typename KeyType, typename ValueType, typename KeyInStorage, typename ValueInStorage>
245  VISKORES_CONT static auto Run(
250  {
251  using Algorithm = viskores::cont::Algorithm;
252 
253  // Make a copy of the input arrays so we don't modify them
255  viskores::cont::ArrayCopy(keys, keys_copy);
256 
258  viskores::cont::ArrayCopy(values, values_copy);
259 
260  // Gather values of the same key by sorting them according to keys
261  Algorithm::SortByKey(keys_copy, values_copy);
262 
263  auto states = viskores::cont::make_ArrayHandleTransform(values_copy, MakeStatState{});
265 
267  Algorithm::ReduceByKey(keys_copy, states, keys_out, results, viskores::Add{});
268 
269  return viskores::cont::make_ArrayHandleZip(keys_out, results);
270  }
271 }; // DescriptiveStatistics
272 
273 } // worklet
274 } // viskores
275 #endif // viskores_worklet_DescriptiveStatistics_h
viskores::worklet::DescriptiveStatistics::StatState::PopulationStddev
T PopulationStddev() const
Definition: DescriptiveStatistics.h:156
viskores::worklet::DescriptiveStatistics::StatState::SampleVariance
T SampleVariance() const
Definition: DescriptiveStatistics.h:159
viskores::worklet::DescriptiveStatistics::StatState::PopulationVariance
T PopulationVariance() const
Definition: DescriptiveStatistics.h:169
viskores::worklet::DescriptiveStatistics::StatState::M3
T M3() const
Definition: DescriptiveStatistics.h:147
viskores::worklet::DescriptiveStatistics::StatState::max_
T max_
Definition: DescriptiveStatistics.h:206
viskores::worklet::DescriptiveStatistics::StatState::M4_
T M4_
Definition: DescriptiveStatistics.h:211
viskores::cont::make_ArrayHandleZip
viskores::cont::ArrayHandleZip< FirstHandleType, SecondHandleType > make_ArrayHandleZip(const FirstHandleType &first, const SecondHandleType &second)
A convenience function for creating an ArrayHandleZip.
Definition: ArrayHandleZip.h:300
viskores::cont::Algorithm
Definition: Algorithm.h:397
viskores::worklet::DescriptiveStatistics::StatState
Definition: DescriptiveStatistics.h:34
viskores::worklet::DescriptiveStatistics::StatState::Kurtosis
T Kurtosis() const
Definition: DescriptiveStatistics.h:191
viskores::worklet::DescriptiveStatistics::StatState::M3_
T M3_
Definition: DescriptiveStatistics.h:210
ArrayHandleTransform.h
viskores::worklet::DescriptiveStatistics::StatState::Mean
T Mean() const
Definition: DescriptiveStatistics.h:141
viskores::cont::ArrayHandle
Manages an array-worth of data.
Definition: ArrayHandle.h:313
viskores::worklet::DescriptiveStatistics::StatState::M4
T M4() const
Definition: DescriptiveStatistics.h:150
viskores::worklet::DescriptiveStatistics::StatState::M2
T M2() const
Definition: DescriptiveStatistics.h:144
VISKORES_EXEC_CONT
#define VISKORES_EXEC_CONT
Definition: ExportMacros.h:60
viskores::cont::ArrayHandleZip
ArrayHandleZip is a specialization of ArrayHandle.
Definition: ArrayHandleZip.h:263
viskores::worklet::DescriptiveStatistics::StatState::Max
T Max() const
Definition: DescriptiveStatistics.h:135
viskores::Sqrt
viskores::Float32 Sqrt(viskores::Float32 x)
Definition: Math.h:951
ArrayCopy.h
viskores::Add
Definition: Types.h:268
ArrayHandleZip.h
viskores::worklet::DescriptiveStatistics::MakeStatState
Definition: DescriptiveStatistics.h:214
viskores::worklet::DescriptiveStatistics::StatState::Sum
T Sum() const
Definition: DescriptiveStatistics.h:138
viskores::worklet::DescriptiveStatistics::StatState::StatState
StatState()
Definition: DescriptiveStatistics.h:37
VISKORES_CONT
#define VISKORES_CONT
Definition: ExportMacros.h:65
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
Algorithm.h
viskores::worklet::DescriptiveStatistics::Run
static StatState< FieldType > Run(const viskores::cont::ArrayHandle< FieldType, Storage > &field)
Calculate various summary statistics for the input ArrayHandle.
Definition: DescriptiveStatistics.h:234
viskores::worklet::DescriptiveStatistics::StatState::M2_
T M2_
Definition: DescriptiveStatistics.h:209
viskores::worklet::DescriptiveStatistics::StatState::Skewness
T Skewness() const
Definition: DescriptiveStatistics.h:179
viskores::worklet::DescriptiveStatistics::StatState::StatState
StatState(T value)
Definition: DescriptiveStatistics.h:50
viskores::worklet::DescriptiveStatistics
Definition: DescriptiveStatistics.h:30
viskores::worklet::DescriptiveStatistics::MakeStatState::operator()
viskores::worklet::DescriptiveStatistics::StatState< T > operator()(T value) const
Definition: DescriptiveStatistics.h:217
viskores::worklet::DescriptiveStatistics::Run
static auto Run(const viskores::cont::ArrayHandle< KeyType, KeyInStorage > &keys, const viskores::cont::ArrayHandle< ValueType, ValueInStorage > &values) -> viskores::cont::ArrayHandleZip< viskores::cont::ArrayHandle< KeyType >, viskores::cont::ArrayHandle< StatState< ValueType >>>
Definition: DescriptiveStatistics.h:245
viskores::worklet::DescriptiveStatistics::StatState::Min
T Min() const
Definition: DescriptiveStatistics.h:132
viskores::worklet::DescriptiveStatistics::StatState::min_
T min_
Definition: DescriptiveStatistics.h:205
viskores::worklet::DescriptiveStatistics::StatState::N
T N() const
Definition: DescriptiveStatistics.h:129
viskores::worklet::DescriptiveStatistics::StatState::operator+
StatState operator+(const StatState< T > &y) const
Definition: DescriptiveStatistics.h:76
viskores::worklet::DescriptiveStatistics::StatState::n_
T n_
Definition: DescriptiveStatistics.h:204
viskores::worklet::DescriptiveStatistics::StatState::mean_
T mean_
Definition: DescriptiveStatistics.h:208
viskores::cont::ArrayCopy
void ArrayCopy(const SourceArrayType &source, DestArrayType &destination)
Does a deep copy from one array to another array.
Definition: ArrayCopy.h:129
viskores::cont::make_ArrayHandleTransform
viskores::cont::ArrayHandleTransform< HandleType, FunctorType > make_ArrayHandleTransform(HandleType handle, FunctorType functor)
make_ArrayHandleTransform is convenience function to generate an ArrayHandleTransform.
Definition: ArrayHandleTransform.h:495
viskores::worklet::DescriptiveStatistics::StatState::sum_
T sum_
Definition: DescriptiveStatistics.h:207
viskores::worklet::DescriptiveStatistics::StatState::StatState
StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4)
Definition: DescriptiveStatistics.h:63
viskores::worklet::DescriptiveStatistics::StatState::SampleStddev
T SampleStddev() const
Definition: DescriptiveStatistics.h:153