From 60d707fd1db41538718d32aa9f0ea667a62716c2 Mon Sep 17 00:00:00 2001
From: Ruben Verghese <rubenv2@illinois.edu>
Date: Wed, 22 Jan 2020 03:33:03 -0600
Subject: [PATCH] Add Normal Distribution Functions (#56)

All functions from https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html except the Expect function.
---
 README.md    |  19 ++++
 norm.go      | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++
 norm_test.go | 204 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 479 insertions(+)
 create mode 100644 norm.go
 create mode 100644 norm_test.go

diff --git a/README.md b/README.md
index a30c949..1dcc714 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,25 @@ func Midhinge(input Float64Data) (float64, error) {}
 func Min(input Float64Data) (min float64, err error) {}
 func MinkowskiDistance(dataPointX, dataPointY Float64Data, lambda float64) (distance float64, err error) {}
 func Mode(input Float64Data) (mode []float64, err error) {}
+func NormBoxMullerRvs(loc float64, scale float64, size int) []float64 {}
+func NormCdf(x float64, loc float64, scale float64) float64 {}
+func NormEntropy(loc float64, scale float64) float64 {}
+func NormFit(data []float64) [2]float64{}
+func NormInterval(alpha float64, loc float64,  scale float64 ) [2]float64 {}
+func NormIsf(p float64, loc float64, scale float64) (x float64) {}
+func NormLogCdf(x float64, loc float64, scale float64) float64 {}
+func NormLogPdf(x float64, loc float64, scale float64) float64 {}
+func NormLogSf(x float64, loc float64, scale float64) float64 {}
+func NormMean(loc float64, scale float64) float64 {}
+func NormMedian(loc float64, scale float64) float64 {}
+func NormMoment(n int, loc float64, scale float64) float64 {}
+func NormPdf(x float64, loc float64, scale float64) float64 {}
+func NormPpf(p float64, loc float64, scale float64) (x float64) {}
+func NormPpfRvs(loc float64, scale float64, size int) []float64 {}
+func NormSf(x float64, loc float64, scale float64) float64 {}
+func NormStats(loc float64, scale float64, moments string) []float64 {}
+func NormStd(loc float64, scale float64) float64 {}
+func NormVar(loc float64, scale float64) float64 {}
 func Pearson(data1, data2 Float64Data) (float64, error) {}
 func Percentile(input Float64Data, percent float64) (percentile float64, err error) {}
 func PercentileNearestRank(input Float64Data, percent float64) (percentile float64, err error) {}
diff --git a/norm.go b/norm.go
new file mode 100644
index 0000000..c49576d
--- /dev/null
+++ b/norm.go
@@ -0,0 +1,256 @@
+package stats
+
+import (
+	"math"
+	"math/rand"
+	"strings"
+	"time"
+)
+
+// NormPpfRvs generates random variates using the Point Percentile Function.
+// For more information please visit: https://demonstrations.wolfram.com/TheMethodOfInverseTransforms/
+func NormPpfRvs(loc float64, scale float64, size int) []float64 {
+	rand.Seed(time.Now().UnixNano())
+	var toReturn []float64
+	for i := 0; i < size; i++ {
+		toReturn = append(toReturn, NormPpf(rand.Float64(), loc, scale))
+	}
+	return toReturn
+}
+
+// NormBoxMullerRvs generates random variates using the Box–Muller transform.
+// For more information please visit: http://mathworld.wolfram.com/Box-MullerTransformation.html
+func NormBoxMullerRvs(loc float64, scale float64, size int) []float64 {
+	rand.Seed(time.Now().UnixNano())
+	var toReturn []float64
+	for i := 0; i < int(math.Floor(float64(size / 2)) + float64(size % 2)); i++ {
+		// u1 and u2 are uniformly distributed random numbers between 0 and 1.
+		u1 := rand.Float64()
+		u2 := rand.Float64()
+		// x1 and x2 are normally distributed random numbers.
+		x1 := loc + (scale * (math.Sqrt(-2*math.Log(u1)) * math.Cos(2*math.Pi*u2)))
+		toReturn = append(toReturn, x1)
+		if (i + 1) * 2 <= size {
+			x2 := loc + (scale * (math.Sqrt(-2*math.Log(u1)) * math.Sin(2*math.Pi*u2)))
+			toReturn = append(toReturn, x2)
+		}
+	}
+	return toReturn
+}
+
+// NormPdf is the probability density function.
+func NormPdf(x float64, loc float64, scale float64) float64 {
+	return (math.Pow(math.E, -(math.Pow(x-loc, 2)) / (2 * math.Pow(scale, 2)))) / (scale* math.Sqrt(2 * math.Pi))
+}
+
+// NormLogPdf is the log of the probability density function.
+func NormLogPdf(x float64, loc float64, scale float64) float64 {
+	return math.Log((math.Pow(math.E, -(math.Pow(x-loc, 2)) / (2 * math.Pow(scale, 2)))) / (scale* math.Sqrt(2 * math.Pi)))
+}
+
+// NormCdf is the cumulative distribution function.
+func NormCdf(x float64, loc float64, scale float64) float64 {
+	return 0.5*(1 + math.Erf((x - loc) / (scale * math.Sqrt(2))))
+}
+
+// NormLogCdf is the log of the cumulative distribution function.
+func NormLogCdf(x float64, loc float64, scale float64) float64 {
+	return math.Log(0.5*(1 + math.Erf((x - loc) / (scale * math.Sqrt(2)))))
+}
+
+// NormSf is the survival function (also defined as 1 - cdf, but sf is sometimes more accurate).
+func NormSf(x float64, loc float64, scale float64) float64 {
+	return 1 - 0.5*(1 + math.Erf((x - loc) / (scale * math.Sqrt(2))))
+}
+
+// NormLogSf is the log of the survival function.
+func NormLogSf(x float64, loc float64, scale float64) float64 {
+	return math.Log(1 - 0.5*(1 + math.Erf((x - loc) / (scale * math.Sqrt(2)))))
+}
+
+// NormPpf is the point percentile function.
+// This is based on Peter John Acklam's inverse normal CDF.
+// algorithm: http://home.online.no/~pjacklam/notes/invnorm/ (no longer visible).
+// For more information please visit: https://stackedboxes.org/2017/05/01/acklams-normal-quantile-function/
+func NormPpf(p float64, loc float64, scale float64) (x float64) {
+	const (
+		a1 = -3.969683028665376e+01
+		a2 = 2.209460984245205e+02
+		a3 = -2.759285104469687e+02
+		a4 = 1.383577518672690e+02
+		a5 = -3.066479806614716e+01
+		a6 = 2.506628277459239e+00
+
+		b1 = -5.447609879822406e+01
+		b2 = 1.615858368580409e+02
+		b3 = -1.556989798598866e+02
+		b4 = 6.680131188771972e+01
+		b5 = -1.328068155288572e+01
+
+		c1 = -7.784894002430293e-03
+		c2 = -3.223964580411365e-01
+		c3 = -2.400758277161838e+00
+		c4 = -2.549732539343734e+00
+		c5 = 4.374664141464968e+00
+		c6 = 2.938163982698783e+00
+
+		d1 = 7.784695709041462e-03
+		d2 = 3.224671290700398e-01
+		d3 = 2.445134137142996e+00
+		d4 = 3.754408661907416e+00
+
+		plow  = 0.02425
+		phigh = 1 - plow
+	)
+
+	if p < 0 || p > 1 {
+		return math.NaN()
+	} else if p == 0 {
+		return -math.Inf(0)
+	} else if p == 1 {
+		return math.Inf(0)
+	}
+
+	if p < plow {
+		q := math.Sqrt(-2 * math.Log(p))
+		x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+	} else if phigh < p {
+		q := math.Sqrt(-2 * math.Log(1-p))
+		x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+	} else {
+		q := p - 0.5
+		r := q * q
+		x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q /
+			(((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1)
+	}
+
+	e := 0.5*math.Erfc(-x/math.Sqrt2) - p
+	u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2)
+	x = x - u/(1+x*u/2)
+
+	return x*scale + loc
+}
+
+// NormIsf is the inverse survival function (inverse of sf).
+func NormIsf(p float64, loc float64, scale float64) (x float64) {
+	if -NormPpf(p, loc, scale) == 0 {
+		return 0
+	}
+	return -NormPpf(p, loc, scale)
+}
+
+// NormMoment approximates the non-central (raw) moment of order n.
+// For more information please visit: https://math.stackexchange.com/questions/1945448/methods-for-finding-raw-moments-of-the-normal-distribution
+func NormMoment(n int, loc float64, scale float64) float64 {
+	toReturn := 0.0
+	for i := 0; i < n + 1; i++ {
+		if (n-i) % 2 == 0 {
+			toReturn += float64(Ncr(n, i)) * (math.Pow(loc, float64(i))) * (math.Pow(scale, float64(n - i))) *
+				(float64(factorial(n - i)) / ((math.Pow(2.0, float64((n - i) / 2))) *
+					float64(factorial((n - i) / 2))))
+		}
+	}
+	return toReturn
+}
+// NormStats returns the mean, variance, skew, and/or kurtosis.
+// Mean(‘m’), variance(‘v’), skew(‘s’), and/or kurtosis(‘k’).
+// Takes string containing any of 'mvsk'.
+// Returns array of m v s k in that order.
+func NormStats(loc float64, scale float64, moments string) []float64 {
+	var toReturn []float64
+	if strings.ContainsAny(moments, "m") {
+		toReturn = append(toReturn, loc)
+	}
+	if strings.ContainsAny(moments, "v") {
+		toReturn = append(toReturn, math.Pow(scale, 2))
+	}
+	if strings.ContainsAny(moments, "s") {
+		toReturn = append(toReturn, 0.0)
+	}
+	if strings.ContainsAny(moments, "k") {
+		toReturn = append(toReturn, 0.0)
+	}
+	return toReturn
+}
+
+// NormEntropy is the differential entropy of the RV.
+func NormEntropy(loc float64, scale float64) float64 {
+	return math.Log(scale * math.Sqrt(2 * math.Pi * math.E))
+}
+
+// NormFit returns the maximum likelihood estimators for the Normal Distribution.
+// Takes array of float64 values.
+// Returns array of Mean followed by Standard Deviation.
+func NormFit(data []float64) [2]float64{
+	sum := 0.00
+	mean := 0.00
+	for i := 0; i < len(data); i++ {
+		sum += data[i]
+	}
+	mean = sum / float64(len(data))
+	stdNumerator := 0.00
+	for i := 0; i < len(data); i++ {
+		stdNumerator += math.Pow(data[i]-mean, 2)
+	}
+	return [2]float64{mean , math.Sqrt((stdNumerator)/(float64(len(data))))}
+}
+
+// NormMedian is the median of the distribution.
+func NormMedian(loc float64, scale float64) float64 {
+	return loc
+}
+
+// NormMean is the mean/expected value of the distribution.
+func NormMean(loc float64, scale float64) float64 {
+	return loc
+}
+
+// NormVar is the variance of the distribution.
+func NormVar(loc float64, scale float64) float64 {
+	return math.Pow(scale, 2)
+}
+
+// NormStd is the standard deviation of the distribution.
+func NormStd(loc float64, scale float64) float64 {
+	return scale
+}
+
+// NormInterval finds endpoints of the range that contains alpha percent of the distribution.
+func NormInterval(alpha float64, loc float64,  scale float64 ) [2]float64 {
+	q1 := (1.0-alpha)/2
+	q2 := (1.0+alpha)/2
+	a := NormPpf(q1, loc, scale)
+	b := NormPpf(q2, loc, scale)
+	return [2]float64{a, b}
+}
+
+// factorial is the naive factorial algorithm.
+func factorial(x int) int {
+	if x == 0 {
+		return 1
+	}
+	return x * factorial(x - 1)
+}
+
+// Ncr is an N choose R algorithm.
+// Aaron Cannon's algorithm.
+func Ncr(n, r int) int {
+	if n <= 1 || r == 0 || n == r {
+		return 1
+	}
+	if newR := n - r; newR < r {
+		r = newR
+	}
+	if r == 1 {
+		return n
+	}
+	ret := int(n - r + 1)
+	for i, j := ret+1, int(2); j <= r; i, j = i+1, j+1 {
+		ret = ret * i / j
+	}
+	return ret
+}
+
+
diff --git a/norm_test.go b/norm_test.go
new file mode 100644
index 0000000..2a5aa32
--- /dev/null
+++ b/norm_test.go
@@ -0,0 +1,204 @@
+package stats
+
+import (
+	"math"
+	"reflect"
+	"testing"
+)
+
+func testEq(a, b []float64) bool {
+
+	// If one is nil, the other must also be nil.
+	if (a == nil) != (b == nil) {
+		return false;
+	}
+
+	if len(a) != len(b) {
+		return false
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+
+	return true
+}
+
+func TestNormPpf(t *testing.T) {
+	if NormPpf(0.5, 0, 1) != 0 {
+		t.Error("Input 0.5, Expected 0")
+	}
+	if NormPpf(0.1, 0, 1) != -1.2815515655446004 {
+		t.Error("Input 0.1, Expected -1.2815515655446004")
+	}
+	if NormPpf(0.002423, 0, 1) != -2.817096255323953 {
+		t.Error("Input 0.002423, Expected -2.817096255323953")
+	}
+	if NormPpf(1 - 0.002423, 0, 1) != 2.817096255323956 {
+		t.Error("Input 1 - 0.002423, Expected 2.817096255323956")
+	}
+	if NormPpf(1.1, 0, 1) == math.NaN() {
+		t.Error("Input 1.1, Expected NaN")
+	}
+	if NormPpf(-0.1, 0, 1) == math.NaN() {
+		t.Error("Input -0.1, Expected Nan")
+	}
+	if NormPpf(0, 0, 1) != -math.Inf(1) {
+		t.Error("Input 0, Expected -Inf")
+	}
+	if NormPpf(1, 0, 1) != math.Inf(1) {
+		t.Error("Input 1, Expected Inf")
+	}
+}
+
+func TestNormCdf(t *testing.T) {
+	if NormCdf(0, 0, 1) != 0.5 {
+		t.Error("Input 0, Expected 0.5")
+	}
+	if NormCdf(0.5, 0, 1) != 0.6914624612740131 {
+		t.Error("Input 0.5, Expected 0.6914624612740131")
+	}
+	if NormCdf(-0.5, 0, 1) != 0.3085375387259869 {
+		t.Error("Input -0.5, Expected 0.3085375387259869")
+	}
+}
+
+func TestNormPdf(t *testing.T) {
+	if NormPdf(0.5, 0, 1) != 0.35206532676429947 {
+		t.Error("Input 0.5, Expected 0.35206532676429947")
+	}
+	if NormPdf(0, 0, 1) != 0.3989422804014327 {
+		t.Error("Input 0, Expected 0.3989422804014327")
+	}
+	if NormPdf(-0.5, 0, 1) != 0.35206532676429947 {
+		t.Error("Input -0.5, Expected 0.35206532676429947")
+	}
+}
+
+func TestNormLogPdf(t *testing.T) {
+	if NormLogPdf(0, 0, 1) != -0.9189385332046727 {
+		t.Error("Input 0, Expected -0.9189385332046727")
+	}
+	if NormPdf(0, 0, 1) != 0.3989422804014327 {
+		t.Error("Input 0, Expected 0.3989422804014327")
+	}
+	if NormPdf(-0.5, 0, 1) != 0.35206532676429947 {
+		t.Error("Input -0.5, Expected 0.35206532676429947")
+	}
+}
+
+func TestNormLogCdf(t *testing.T) {
+	if NormLogCdf(0.5, 0, 1) != -0.36894641528865635 {
+		t.Error("Input 0.5, Expected -0.36894641528865635")
+	}
+}
+
+func TestNormIsf(t *testing.T) {
+	if NormIsf(0.5, 0, 1) != 0 {
+		t.Error("Input 0.5, Expected 0")
+	}
+	if NormIsf(0.1, 0, 1) != 1.2815515655446004 {
+		t.Error("Input 0.1, Expected 1.2815515655446004")
+	}
+}
+
+func TestNormSf(t *testing.T) {
+	if NormSf(0.5, 0, 1) != 0.3085375387259869 {
+		t.Error("Input 0.5, Expected 0.3085375387259869")
+	}
+}
+
+func TestNormLogSf(t *testing.T) {
+	if NormLogSf(0.5, 0, 1) != -1.1759117615936185 {
+		t.Error("Input 0.5, Expected -1.1759117615936185")
+	}
+}
+
+func TestNormMoment(t *testing.T) {
+	if NormMoment(4, 0, 1) != 3 {
+		t.Error("Input 3, Expected 3")
+	}
+	if NormMoment(4, 0, 1) != 3 {
+		t.Error("Input 3, Expected 3")
+	}
+}
+
+func TestNormStats(t *testing.T) {
+	if !reflect.DeepEqual(NormStats(0, 1, "m"), []float64{0}){
+		t.Error("Input 'm' , Expected 0")
+	}
+	if !reflect.DeepEqual(NormStats(0, 1, "v"), []float64{1}){
+		t.Error("Input 'v' , Expected 1")
+	}
+	if !reflect.DeepEqual(NormStats(0, 1, "s"), []float64{0}){
+		t.Error("Input 's' , Expected 0")
+	}
+	if !reflect.DeepEqual(NormStats(0, 1, "k"), []float64{0}){
+		t.Error("Input 'k' , Expected 0")
+	}
+}
+
+func TestNormEntropy(t *testing.T) {
+	if NormEntropy( 0, 1) != 1.4189385332046727 {
+		t.Error("Input ( 0 , 1 ), Expected 1.4189385332046727")
+	}
+}
+
+func TestNormFit(t *testing.T) {
+	if !reflect.DeepEqual(NormFit( []float64{0,2,3,4}), [2]float64{2.25, 1.479019945774904}) {
+		t.Error("Input (0,2,3,4), Expected {2.25, 1.479019945774904}")
+	}
+}
+
+func TestNormInterval(t *testing.T) {
+	if !reflect.DeepEqual(NormInterval(0.5, 0, 1), [2]float64{-0.6744897501960818, 0.674489750196082}){
+		t.Error("Input (50 % ), Expected {-0.6744897501960818, 0.674489750196082}")
+	}
+}
+
+func TestNormMean(t *testing.T) {
+	if NormMean(0, 1) != 0 {
+		t.Error("Input (0, 1), Expected 0")
+	}
+}
+
+func TestNormMedian(t *testing.T) {
+	if NormMedian(0, 1) != 0 {
+		t.Error("Input (0, 1), Expected 0")
+	}
+}
+
+func TestNormVar(t *testing.T) {
+	if NormVar(0, 1) != 1 {
+		t.Error("Input (0, 1), Expected 1")
+	}
+}
+
+func TestNormStd(t *testing.T) {
+	if NormStd(0, 1) != 1 {
+		t.Error("Input (0, 1), Expected 1")
+	}
+}
+
+func TestNormPpfRvs(t *testing.T) {
+	if len(NormPpfRvs(0, 1, 101)) != 101 {
+		t.Error("Input size=101, Expected 101")
+	}
+}
+
+func TestNormBoxMullerRvs(t *testing.T) {
+	if len(NormBoxMullerRvs(0, 1, 101)) != 101 {
+		t.Error("Input size=101, Expected 101")
+	}
+}
+
+func TestNcr(t *testing.T) {
+	if Ncr(4, 1) != 4 {
+		t.Error("Input 4 choose 1, Expected 4")
+	}
+	if Ncr(4, 3) != 4{
+		t.Error("Input 4 choose 3, Expected 4")
+	}
+}