diff options
Diffstat (limited to 'gsl-1.9/cdf/hypergeometric.c')
-rw-r--r-- | gsl-1.9/cdf/hypergeometric.c | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/gsl-1.9/cdf/hypergeometric.c b/gsl-1.9/cdf/hypergeometric.c new file mode 100644 index 0000000..c0dee39 --- /dev/null +++ b/gsl-1.9/cdf/hypergeometric.c @@ -0,0 +1,183 @@ +/* cdf/hypergeometric.c + * + * Copyright (C) 2004 Jason H. Stover. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * Computes the cumulative distribution function for a hypergeometric + * random variable. A hypergeometric random variable X is the number + * of elements of type 1 in a sample of size t, drawn from a population + * of size n1 + n2, in which n1 are of type 1 and n2 are of type 2. + * + * This algorithm computes Pr( X <= k ) by summing the terms from + * the mass function, Pr( X = k ). + * + * References: + * + * T. Wu. An accurate computation of the hypergeometric distribution + * function. ACM Transactions on Mathematical Software. Volume 19, number 1, + * March 1993. + * This algorithm is not used, since it requires factoring the + * numerator and denominator, then cancelling. It is more accurate + * than the algorithm used here, but the cancellation requires more + * time than the algorithm used here. + * + * W. Feller. An Introduction to Probability Theory and Its Applications, + * third edition. 1968. Chapter 2, section 6. + */ + +#include <config.h> +#include <math.h> +#include <gsl/gsl_math.h> +#include <gsl/gsl_errno.h> +#include <gsl/gsl_cdf.h> +#include <gsl/gsl_randist.h> + +#include "error.h" + +static double +lower_tail (const unsigned int k, const unsigned int n1, + const unsigned int n2, const unsigned int t) +{ + double relerr; + int i = k; + double s, P; + + s = gsl_ran_hypergeometric_pdf (i, n1, n2, t); + P = s; + + while (i > 0) + { + double factor = + (i / (n1 - i + 1.0)) * ((n2 + i - t) / (t - i + 1.0)); + s *= factor; + P += s; + relerr = s / P; + if (relerr < GSL_DBL_EPSILON) + break; + i--; + } + + return P; +} + +static double +upper_tail (const unsigned int k, const unsigned int n1, + const unsigned int n2, const unsigned int t) +{ + double relerr; + unsigned int i = k + 1; + double s, Q; + + s = gsl_ran_hypergeometric_pdf (i, n1, n2, t); + Q = s; + + while (i < t) + { + double factor = + ((n1 - i) / (i + 1.0)) * ((t - i) / (n2 + i + 1.0 - t)); + s *= factor; + Q += s; + relerr = s / Q; + if (relerr < GSL_DBL_EPSILON) + break; + i++; + } + + return Q; +} + + + + +/* + * Pr (X <= k) + */ +double +gsl_cdf_hypergeometric_P (const unsigned int k, + const unsigned int n1, + const unsigned int n2, const unsigned int t) +{ + double P; + + if (t > (n1 + n2)) + { + CDF_ERROR ("t larger than population size", GSL_EDOM); + } + else if (k >= n1 || k >= t) + { + P = 1.0; + } + else if (k < 0.0) + { + P = 0.0; + } + else + { + double midpoint = (int) (t * n1 / (n1 + n2)); + + if (k >= midpoint) + { + P = 1 - upper_tail (k, n1, n2, t); + } + else + { + P = lower_tail (k, n1, n2, t); + } + } + + return P; +} + +/* + * Pr (X > k) + */ +double +gsl_cdf_hypergeometric_Q (const unsigned int k, + const unsigned int n1, + const unsigned int n2, const unsigned int t) +{ + double Q; + + if (t > (n1 + n2)) + { + CDF_ERROR ("t larger than population size", GSL_EDOM); + } + else if (k >= n1 || k >= t) + { + Q = 0.0; + } + else if (k < 0.0) + { + Q = 1.0; + } + else + { + double midpoint = (int) (t * n1 / (n1 + n2)); + + if (k < midpoint) + { + Q = 1 - lower_tail (k, n1, n2, t); + } + else + { + Q = upper_tail (k, n1, n2, t); + } + } + + return Q; +} |