hist.h
Go to the documentation of this file.
1 /*
2  -------------------------------------------------------------------
3 
4  Copyright (C) 2010-2018, Andrew W. Steiner
5 
6  This file is part of O2scl.
7 
8  O2scl is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version.
12 
13  O2scl is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with O2scl. If not, see <http://www.gnu.org/licenses/>.
20 
21  -------------------------------------------------------------------
22 */
23 #ifndef O2SCL_HIST_H
24 #define O2SCL_HIST_H
25 
26 /** \file hist.h
27  \brief File defining \ref o2scl::hist
28 */
29 #include <iostream>
30 
31 #include <boost/numeric/ublas/vector.hpp>
32 
33 #include <o2scl/convert_units.h>
34 #include <o2scl/interp.h>
35 #include <o2scl/uniform_grid.h>
36 #include <o2scl/table.h>
37 
38 // Forward definition of the hist class for HDF I/O
39 namespace o2scl {
40  class hist;
41 }
42 
43 // Forward definition of HDF I/O to extend friendship in hist
44 namespace o2scl_hdf {
45  class hdf_file;
46  void hdf_input(hdf_file &hf, o2scl::hist &t, std::string name);
47  void hdf_output(hdf_file &hf, o2scl::hist &t, std::string name);
48 }
49 
50 #ifndef DOXYGEN_NO_O2NS
51 namespace o2scl {
52 #endif
53 
54  /** \brief A one-dimensional histogram class
55 
56  See discussion in the User's guide in the \ref hist_section
57  section.
58 
59  One may set the histogram bins using \ref set_bin_edges() or one
60  may manually set the limit of one bin using the reference
61  returned by get_bin_low(), get_bin_low_i(), get_bin_high(), or
62  get_bin_high_i(). Note that if one attempts to set the bins on a
63  histogram where the bins have already been set, one must ensure
64  that the new and old bin sets have the same size. This ensures
65  that there is no ambiguity in rebinning the data and also
66  prevents accidental data loss. One may set the bin edges
67  either with a generic vector, or as a \ref uniform_grid object.
68 
69  To save space, representative vectors are not allocated until
70  they are used.
71 
72  \note In order to ensure the histogram does not employ
73  user-specified representative values that are not defined, the
74  function \ref set_rep_mode() does not allow one to change the
75  mode to \ref rmode_user directly. Instead, use \ref set_reps()
76  which automatically sets the mode to \ref rmode_user and allows
77  the user to specify the representatives.
78 
79  \note If the user changes the bin edges and the histogram is in
80  mode \ref rmode_user, the bin weights will not be modified and
81  the same representative values will be assumed for the new bin
82  edges.
83 
84  \hline
85 
86  \todo Check implementation of <tt>hist::extend_lhs</tt>.
87  \todo More testing.
88 
89  \future
90  - Add a counter which counts the number of calls to update()?
91  - Add conversions back and forth from GSL histograms
92  - Create extend_lhs too?
93  - Would be nice not to have to create a new \ref
94  o2scl::search_vec object in \ref o2scl::hist::get_bin_index()
95  (make a search_vec data member?)
96  - Consider adding the analogs of the GSL histogram
97  sampling functions (separate class?)
98  - Add a function which computes the bin sizes?
99  - Allow rebinning?
100  - Add histograms of float and integer values
101  - Allow addition and other operations for two histograms.
102  - Make the interpolation functions \c const (this is a bit
103  complicated because of \ref o2scl::hist::set_reps_auto() ).
104 
105  \hline
106 
107  Internally, none of the vectors should have memory allocated for
108  them when hsize is zero, and the vector sizes should match the
109  histogram size. These and other checks are performed by \ref
110  is_valid() . Also, the function \ref set_reps_auto() should not
111  be called when mode is \ref rmode_user.
112  */
113  class hist {
114 
115  public:
116 
118 
119  protected:
120 
121  /// Bin locations (N+1)
122  ubvector ubin;
123 
124  /// Bin contents (N)
125  ubvector uwgt;
126 
127  /// Bin representative values (N)
128  ubvector urep;
129 
130  /// User-defined representative values (N)
131  ubvector user_rep;
132 
133  /// Number of bins
134  size_t hsize;
135 
136  /// Representative mode
137  size_t rmode;
138 
139  /// Interpolation type
140  size_t itype;
141 
142  /** \brief Set the representative array according to current
143  rmode (if not in user rep mode)
144  */
145  void set_reps_auto();
146 
147  /// Interpolation typedef
149 
150  /** \brief Allocate vectors for a histogram of size \c n
151 
152  This function also sets all the weights to zero.
153  */
154  void allocate(size_t n);
155 
156  public:
157 
158  /// Create an empty histogram
159  hist();
160 
161  ~hist();
162 
163  /// Copy constructor
164  hist(const hist &h);
165 
166  /// Copy constructor
167  hist &operator=(const hist &h);
168 
169  /** \brief Create a histogram from the first \c nv entries in
170  a vector of data
171 
172  This function creates a histogram with \c n_bins equally
173  spaced bins from the minimum element to the maximum element in
174  \c v . The values of \ref extend_lhs and \ref extend_rhs are
175  set to true, so the first and last bin are guaranteed to be at
176  least 1.
177  */
178  template<class vec_t> hist(size_t nv, const vec_t &v, size_t n_bins) {
179 
180  itype=1;
181  rmode=rmode_avg;
182  hsize=0;
183  extend_lhs=true;
184  extend_rhs=true;
185 
186  double min, max;
187  o2scl::vector_minmax_value(nv,v,min,max);
189  set_bin_edges(ug);
190 
191  for(size_t i=0;i<nv;i++) {
192  update(v[i]);
193  }
194  return;
195  }
196 
197  /** \brief Create a histogram from the first \c nv entries in
198  a vector of data and a vector of weights
199 
200  This function creates a histogram with \c n_bins equally
201  spaced bins from the minimum element to the maximum element in
202  \c v . The values of \ref extend_lhs and \ref extend_rhs are
203  set to true, so the first and last bin are guaranteed to be at
204  least 1.
205  */
206  template<class vec_t, class vec2_t>
207  hist(size_t nv, const vec_t &v, const vec2_t &w, size_t n_bins) {
208 
209  itype=1;
210  rmode=rmode_avg;
211  hsize=0;
212  extend_lhs=true;
213  extend_rhs=true;
214 
215  double min, max;
216  o2scl::vector_minmax_value(nv,v,min,max);
218  set_bin_edges(ug);
219 
220  for(size_t i=0;i<nv;i++) {
221  update(v[i],w[i]);
222  }
223  return;
224  }
225 
226  /** \brief Create a histogram from a vector of data
227 
228  This function creates a histogram with \c n_bins equally
229  spaced bins from the minimum element to the maximum element in
230  \c v . The values of \ref extend_lhs and \ref extend_rhs are
231  set to true, so the first and last bin are guaranteed to be at
232  least 1.
233  */
234  template<class vec_t> hist(const vec_t &v, size_t n_bins) {
235  size_t nv=v.size();
236  hist(nv,v,n_bins);
237  return;
238  }
239 
240  /** \brief Create a histogram from a vector of data and a vector
241  of weights
242 
243  This function creates a histogram with \c n_bins equally
244  spaced bins from the minimum element to the maximum element in
245  \c v . The values of \ref extend_lhs and \ref extend_rhs are
246  set to true, so the first and last bin are guaranteed to be at
247  least 1.
248  */
249  template<class vec_t, class vec2_t> hist
250  (const vec_t &v, const vec2_t &w, size_t n_bins) {
251 
252  size_t nv=v.size();
253  hist(nv,v,w,n_bins);
254  return;
255  }
256 
257  /** \brief Create a histogram from a column in a \ref o2scl::table
258  object
259  */
260  void from_table(o2scl::table<> &t, std::string colx,
261  size_t n_bins) {
262  *this=hist(t.get_nlines(),t.get_column(colx),n_bins);
263  return;
264  }
265 
266  /** \brief Create a histogram from a column of data and a column
267  of weights in a \ref o2scl::table object
268  */
269  void from_table(o2scl::table<> &t, std::string colx, std::string coly,
270  size_t n_bins) {
271  *this=hist(t.get_nlines(),t.get_column(colx),t.get_column(coly),
272  n_bins);
273  return;
274  }
275 
276  /// The histogram size
277  size_t size() const {
278  return hsize;
279  }
280 
281  /** \brief If true, allow abcissae beyond the last bin (default false)
282 
283  If this is true, the histogram will allow data with
284  corresponding to bins larger than the largest bin (for
285  increasing bin edges) or smaller than the smallest bin (for
286  decreasing bin edges).
287  */
289 
290  /** \brief If true, allow abcissae before the first bin (default false)
291  */
293 
294  /// \name Initial bin setup
295  //@{
296  /** \brief Set bins from a \ref uniform_grid object
297 
298  If the current histogram is not empty, then the number of bins
299  reported by \ref uniform_grid<>::get_nbins() should be equal
300  to the current histogram size so that the number of bins is
301  equal and we can use the same weights.
302 
303  If either the histogram is empty, or the current
304  representative mode is not \ref rmode_user, then the
305  representative mode is automatically set to \ref rmode_avg (or
306  \ref rmode_gmean if \ref uniform_grid::is_log() returns \c
307  true ) .
308  */
309  void set_bin_edges(uniform_grid<double> g);
310 
311  /** \brief Set the bins from a vector
312 
313  The parameter \c n is the size of the vector, equal to the
314  number of edges (always one more than the number of bins). If
315  the current histogram is not empty, then \c n should be equal
316  one larger to the size reported by \ref size() so that the
317  number of bins is equal and we can use the same weights.
318  */
319  template<class vec_t> void set_bin_edges(size_t n, const vec_t &v) {
320  if (n!=hsize+1) {
321  if (hsize!=0) {
322  O2SCL_ERR2("Requested binning change in non-empty ",
323  "histogram in hist::set_bin_edges().",exc_efailed);
324  }
325  allocate(n-1);
326  }
327  for(size_t i=0;i<n;i++) ubin[i]=v[i];
328  // Reset internal reps
329  if (urep.size()>0) urep.clear();
330  return;
331  }
332  //@}
333 
334  /// \name Weight functions
335  //@{
336  /// Increment bin for \c x by value \c val
337  void update(double x, double val=1.0);
338 
339  /// Increment bin with index \c i by value \c val
340  void update_i(size_t i, double val=1.0) {
341  uwgt[i]+=val;
342  return;
343  }
344 
345  /// Return contents of bin with index \c i
346  const double &get_wgt_i(size_t i) const;
347 
348  /// Return contents of bin with index \c i
349  double &get_wgt_i(size_t i);
350 
351  /// Return contents of bin for \c x
352  const double &get_wgt(double x) const {
353  return get_wgt_i(get_bin_index(x));
354  }
355 
356  /// Return contents of bin for \c x
357  double &get_wgt(double x) {
358  return get_wgt_i(get_bin_index(x));
359  }
360 
361  /// Set contents of bin with index \c i to value \c val
362  void set_wgt_i(size_t i, double val);
363 
364  /// Set contents of bin for \c x to value \c val
365  void set_wgt(double x, double val) {
366  set_wgt_i(get_bin_index(x),val);
367  return;
368  }
369 
370  /// Get a reference to the full y vector
371  const ubvector &get_wgts() const {
372  return uwgt;
373  }
374 
375  /// Get a reference to the weight for the bin at index \c i
376  const double &operator[](size_t i) const {
377  return uwgt[i];
378  }
379 
380  /// Get a reference to the weight for the bin at index \c i
381  double &operator[](size_t i) {
382  return uwgt[i];
383  }
384  //@}
385 
386  /// \name Bin manipulation
387  //@{
388  /** \brief Get the index of the bin which holds \c x
389 
390  Always returns a value between 0 and size() (inclusive)
391  */
392  size_t get_bin_index(double x) const;
393 
394  /// Get the lower edge of bin of index \c i
395  double &get_bin_low_i(size_t i);
396 
397  /// Get the lower edge of bin of index \c i
398  const double &get_bin_low_i(size_t i) const;
399 
400  /// Get the upper edge of bin of index \c i
401  double &get_bin_high_i(size_t i);
402 
403  /// Get the upper edge of bin of index \c i
404  const double &get_bin_high_i(size_t i) const;
405 
406  /// Get the lower edge of bin of index \c i
407  double &get_bin_low(double x) {
408  return get_bin_low_i(get_bin_index(x));
409  }
410 
411  /// Get the lower edge of bin of index \c i
412  const double &get_bin_low(double x) const {
413  return get_bin_low_i(get_bin_index(x));
414  }
415 
416  /// Get the upper edge of bin of index \c i
417  double &get_bin_high(double x) {
418  return get_bin_high_i(get_bin_index(x));
419  }
420 
421  /// Get the upper edge of bin of index \c i
422  const double &get_bin_high(double x) const {
423  return get_bin_high_i(get_bin_index(x));
424  }
425 
426  /// Get a reference to the full vector of bin specifications
427  const ubvector &get_bins() const {
428  return ubin;
429  }
430  //@}
431 
432  /// \name Max and min functions
433  //@{
434  /** \brief Get maximum weight
435  */
436  double get_max_wgt() const;
437 
438  /** \brief Get the bin index of the maximum weight
439  */
440  size_t get_max_index() const;
441 
442  /** \brief Get the representative for the bin with maximum weight
443  */
444  double get_max_rep();
445 
446  /** \brief Get minimum weight
447  */
448  double get_min_wgt() const;
449 
450  /** \brief Get the bin index of the minimum weight
451  */
452  size_t get_min_index() const;
453 
454  /** \brief Get the representative for the bin with minimum weight
455  */
456  double get_min_rep();
457  //@}
458 
459  /// \name Delete functions
460  //@{
461  /// Clear the data, but leave the bins as is
462  void clear_wgts();
463 
464  /// Clear the entire histogram
465  void clear();
466  //@}
467 
468  /// \name Representative modes (default is rmode_avg)
469  // Using \c rmode_avg in documentation doesn't work.
470  //@{
471  /// Average lower and upper edge
472  static const size_t rmode_avg=0;
473  /// Use user-specified representative
474  static const size_t rmode_user=1;
475  /// Use lower edge
476  static const size_t rmode_low=2;
477  /// Use upper edge
478  static const size_t rmode_high=3;
479  /// Use the geometric mean of the lower and upper edges
480  static const size_t rmode_gmean=4;
481  //@}
482 
483  /// \name Representative functions
484  //@{
485  /// Set the representative x-values for each bin
486  template<class vec_t> void set_reps(size_t n, const vec_t &v) {
487  if (n!=hsize) {
488  std::string s="Expected a vector of size "+itos(hsize)+
489  " and got a vector of size "+itos(n)+" in hist::set_reps().";
490  O2SCL_ERR(s.c_str(),exc_einval);
491  }
492  rmode=rmode_user;
493  if (user_rep.size()>0) user_rep.clear();
494  user_rep.resize(n);
495  for(size_t i=0;i<n;i++) user_rep[i]=v[i];
496  return;
497  }
498 
499  /** \brief Set mode used to compute bin representatives
500 
501  Acceptable inputs are \ref rmode_avg, \ref rmode_low,
502  \ref rmode_high, and \ref rmode_gmean .
503  */
504  void set_rep_mode(size_t mode);
505 
506  /// Get mode used to compute bin representatives
507  size_t get_rep_mode() const {
508  return rmode;
509  }
510 
511  /** \brief Return the representative of bin of index \c i
512 
513  Note that this function returns a value and not a reference.
514  This is because we can't return a reference to the internally
515  computed representatives, since they don't always exist.
516  */
517  double get_rep_i(size_t i);
518 
519  /// Return the representative of bin containing \c x
520  double get_rep(double x) {
521  return get_rep_i(get_bin_index(x));
522  }
523 
524  /** \brief Create a vector filled with the representatives for
525  each bin
526  */
527  template<class resize_vec_t> void create_rep_vec(resize_vec_t &v) {
528  v.resize(hsize);
529  for(size_t i=0;i<hsize;i++) {
530  v[i]=get_rep_i(i);
531  }
532  return;
533  }
534  //@}
535 
536  /// \name Evaluation and interpolation functions
537  //@{
538  /// Return the value of the function at \c x
539  double operator()(double x);
540 
541  /// Return the value of the function at \c x
542  double interp(double x);
543 
544  /// Return the derivative of the function at \c x
545  double deriv(double x);
546 
547  /// Return the second derivative of the function at \c x
548  double deriv2(double x);
549 
550  /// Return the integral of the function between \c x and \c y
551  double integ(double x, double y);
552 
553  /// Set the interpolation type
554  void set_interp_type(size_t interp_type);
555  //@}
556 
557  /// \name Other functions
558  //@{
559  /// Return the sum of all of the weights
560  double sum_wgts();
561 
562  /** \brief Return the integral under the histogram
563 
564  This function returns
565  \f[
566  \sum_{i=0}^{N-1} w_i ( \mathrm{high}_i - \mathrm{low}_i) \, .
567  \f]
568  where \f$ N \f$ is the size of the histogram.
569  */
570  double integ_wgts();
571 
572  /** \brief This function copies all bin representative values to
573  the vector \c v, presuming that it has already been allocated
574  */
575  template<class vec_t> void copy_reps(vec_t &v) {
576 #if !O2SCL_NO_RANGE_CHECK
577  is_valid();
578 #endif
579  // If we're in user mode, just return the user value
580  if (rmode==rmode_user) {
581  for(size_t i=0;i<hsize;i++) {
582  v[i]=user_rep[i];
583  }
584  return;
585  }
586  // Check if the internal reps are not already computed
587  if (urep.size()==0) set_reps_auto();
588  // Copy the data over
589  for(size_t i=0;i<hsize;i++) {
590  v[i]=urep[i];
591  }
592  return;
593  }
594 
595  /** \brief Get the representative values for the bins
596  and store them in vector \c v using <tt>std::swap</tt> .
597 
598  This function resizes the vector \c v if necessary.
599  */
600  void swap_reps(ubvector &v);
601 
602  /** \brief Renormalize the weights to fix the integral
603 
604  This computes the integral using \ref integ() and so the
605  action of this function depends on the interpolation type.
606  If the histogram is empty, an exception is thrown.
607  */
608  void normalize(double new_sum=1.0);
609 
610  /** \brief Internal consistency check
611 
612  This function principally checks that the sizes of
613  the internal vectors match up.
614  */
615  void is_valid() const;
616 
617  /** \brief Copy histogram data to a table
618 
619  First, if the table \c t has less rows than the histogram has
620  bins, then new rows are added to the table and values in the
621  new rows of the current columns are set to zero. Second, this
622  creates new columns in the table named \c reps, \c
623  lower_edges, \c upper_edges, and \c weights . Finally,
624  the histogram data is copied to the four new columns.
625  */
626  void copy_to_table(table<> &t, std::string reps, std::string lower_edges,
627  std::string upper_edges, std::string weights);
628  //@}
629 
630  // Allow HDF I/O function to access hist data
631  friend void o2scl_hdf::hdf_output(o2scl_hdf::hdf_file &hf, o2scl::hist &h,
632  std::string name);
633 
634  // Allow HDF I/O function to access hist data
636  std::string name);
637 
638  };
639 
640 #ifndef DOXYGEN_NO_O2NS
641 }
642 #endif
643 
644 #endif
Interpolation class for pre-specified vector.
Definition: interp.h:1795
interp_vec< ubvector > interp_t
Interpolation typedef.
Definition: hist.h:148
ubvector ubin
Bin locations (N+1)
Definition: hist.h:122
size_t size() const
The histogram size.
Definition: hist.h:277
size_t itype
Interpolation type.
Definition: hist.h:140
The main O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl names...
Definition: anneal.h:42
size_t get_rep_mode() const
Get mode used to compute bin representatives.
Definition: hist.h:507
size_t get_nlines() const
Return the number of lines.
Definition: table.h:451
Data table table class.
Definition: table.h:49
void set_wgt(double x, double val)
Set contents of bin for x to value val.
Definition: hist.h:365
invalid argument supplied by user
Definition: err_hnd.h:59
ubvector uwgt
Bin contents (N)
Definition: hist.h:125
const ubvector & get_wgts() const
Get a reference to the full y vector.
Definition: hist.h:371
bool extend_rhs
If true, allow abcissae beyond the last bin (default false)
Definition: hist.h:288
void from_table(o2scl::table<> &t, std::string colx, std::string coly, size_t n_bins)
Create a histogram from a column of data and a column of weights in a o2scl::table object...
Definition: hist.h:269
hist(const vec_t &v, size_t n_bins)
Create a histogram from a vector of data.
Definition: hist.h:234
generic failure
Definition: err_hnd.h:61
A one-dimensional histogram class.
Definition: hist.h:113
ubvector urep
Bin representative values (N)
Definition: hist.h:128
const vec_t & get_column(std::string scol) const
Returns a reference to the column named col. .
Definition: table.h:657
void from_table(o2scl::table<> &t, std::string colx, size_t n_bins)
Create a histogram from a column in a o2scl::table object.
Definition: hist.h:260
bool extend_lhs
If true, allow abcissae before the first bin (default false)
Definition: hist.h:292
const double & get_bin_high(double x) const
Get the upper edge of bin of index i.
Definition: hist.h:422
double get_rep(double x)
Return the representative of bin containing x.
Definition: hist.h:520
void update_i(size_t i, double val=1.0)
Increment bin with index i by value val.
Definition: hist.h:340
hist(size_t nv, const vec_t &v, const vec2_t &w, size_t n_bins)
Create a histogram from the first nv entries in a vector of data and a vector of weights.
Definition: hist.h:207
ubvector user_rep
User-defined representative values (N)
Definition: hist.h:131
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
Definition: err_hnd.h:281
hist(size_t nv, const vec_t &v, size_t n_bins)
Create a histogram from the first nv entries in a vector of data.
Definition: hist.h:178
const ubvector & get_bins() const
Get a reference to the full vector of bin specifications.
Definition: hist.h:427
The O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl namespace ...
Definition: table.h:53
void vector_minmax_value(size_t n, vec_t &data, data_t &min, data_t &max)
Compute the minimum and maximum of the first n elements of a vector.
Definition: vector.h:1275
const double & operator[](size_t i) const
Get a reference to the weight for the bin at index i.
Definition: hist.h:376
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
Definition: err_hnd.h:273
double & get_wgt(double x)
Return contents of bin for x.
Definition: hist.h:357
void copy_reps(vec_t &v)
This function copies all bin representative values to the vector v, presuming that it has already bee...
Definition: hist.h:575
double & get_bin_low(double x)
Get the lower edge of bin of index i.
Definition: hist.h:407
double & get_bin_high(double x)
Get the upper edge of bin of index i.
Definition: hist.h:417
size_t rmode
Representative mode.
Definition: hist.h:137
const double & get_bin_low(double x) const
Get the lower edge of bin of index i.
Definition: hist.h:412
void set_reps(size_t n, const vec_t &v)
Set the representative x-values for each bin.
Definition: hist.h:486
size_t hsize
Number of bins.
Definition: hist.h:134
void create_rep_vec(resize_vec_t &v)
Create a vector filled with the representatives for each bin.
Definition: hist.h:527
Store data in an O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$sc...
Definition: hdf_file.h:101
const double & get_wgt(double x) const
Return contents of bin for x.
Definition: hist.h:352
double & operator[](size_t i)
Get a reference to the weight for the bin at index i.
Definition: hist.h:381
std::string itos(int x)
Convert an integer to a string.
void hdf_input(hdf_file &hf, o2scl::table< vec_t > &t, std::string name)
Input a o2scl::table object from a hdf_file.
Definition: hdf_io.h:146
void set_bin_edges(size_t n, const vec_t &v)
Set the bins from a vector.
Definition: hist.h:319
Interpolation class for general vectors.
Definition: interp.h:1654
Linear grid with fixed number of bins and fixed endpoint.
Definition: uniform_grid.h:333

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).