Skip to content

Commit

Permalink
Add bootstrapping to regression routine
Browse files Browse the repository at this point in the history
  • Loading branch information
jchristopherson committed Mar 13, 2024
1 parent ae3ca9e commit db84baf
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 24 deletions.
9 changes: 5 additions & 4 deletions examples/bootstrap_regression_example.f90
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ program example
character, parameter :: tab = achar(9)
character, parameter :: nl = new_line('a')
integer(int32) :: i
real(real64) :: x(31), y(31), coeffs(4), ymodeled(31), residuals(31)
real(real64) :: x(31), y(31), coeffs(4), ymodeled(31), residuals(31), bias(4)
type(regression_statistics) :: stats(4)
type(bootstrap_regression_statistics) :: bstats(4)

Expand Down Expand Up @@ -102,7 +102,7 @@ program example

! Fit the data using the bootstrap approach
call bootstrap_linear_least_squares(3, .true., x, y, coeffs, ymodeled, &
residuals, stats = bstats, nsamples = 5000)
residuals, stats = bstats, nsamples = 100000, bias = bias)

! Display the results
print '(A)', new_line('') // "***** BOOTSTRAP METHOD *****"
Expand All @@ -114,12 +114,13 @@ program example

! Illustrate the statistics for each coefficient
do i = 1, size(bstats)
print '(AI0AF6.3AF6.3AF6.3AF6.3AF6.3)', &
print '(AI0AF6.3AF6.3AF6.3AF6.3AF6.3AF6.3)', &
"Coefficient ", i, ":" // nl // &
tab // "Standard Error: ", bstats(i)%standard_error, nl // &
tab // "Upper Confidence Interval: ", bstats(i)%upper_confidence_interval, nl // &
tab // "Lower Confidence Interval: ", bstats(i)%lower_confidence_interval, nl // &
tab // "T-Statistic: ", bstats(i)%t_statistic, nl // &
tab // "P-Value: ", bstats(i)%probability
tab // "P-Value: ", bstats(i)%probability, nl // &
tab // "Bias: ", bias(i)
end do
end program
64 changes: 44 additions & 20 deletions src/bootstrapping.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
submodule (fstats) bootstrapping
use fstats_errors
use omp_lib
implicit none

! REFERENCES:
Expand Down Expand Up @@ -28,13 +30,13 @@ module subroutine bs_linear_least_squares_real64(order, intercept, x, y, &
! Parameters
real(real64), parameter :: zero = 0.0d0
real(real64), parameter :: half = 0.5d0
real(real64), parameter :: one = 1.0d0
real(real64), parameter :: p05 = 5.0d-2

! Local Variables
integer(int32) :: i, j, i1, i2, n, ns, nc, ncoeffs, flag
real(real64) :: eps, alph, ms
real(real64), allocatable, dimension(:,:) :: allcoeffs, allresid, allf, &
ally
real(real64), allocatable, dimension(:) :: fLocal, yLocal, rLocal
real(real64), allocatable, dimension(:,:) :: allcoeffs
type(t_distribution) :: dist
class(errors), pointer :: errmgr
type(errors), target :: deferr
Expand All @@ -53,7 +55,7 @@ module subroutine bs_linear_least_squares_real64(order, intercept, x, y, &
if (present(alpha)) then
alph = alpha
else
alph = 0.05d0
alph = p05
end if
n = size(x)
ncoeffs = order + 1
Expand All @@ -63,37 +65,54 @@ module subroutine bs_linear_least_squares_real64(order, intercept, x, y, &
if (intercept) nc = nc + 1
dist%dof = real(ns - nc)

! TO DO:
! - Input Checking
! - Verify the size of BIAS & ensure it's the appropriate size along with
! COEFF

! Compute the fit
call linear_least_squares(order, intercept, x, y, coeffs, &
ymod, resid, alpha = alph, err = errmgr)
if (errmgr%has_error_occurred()) return

! Memory Allocations
allocate(allcoeffs(ncoeffs, ns), allresid(n, ns), allf(n, ns), ally(n, ns - 1), &
stat = flag)
allocate(allcoeffs(ncoeffs, ns), source = zero, stat = flag)
if (flag /= 0) then
! TO DO: Memory Error
call report_memory_error(errmgr, "bs_linear_least_squares_real64", flag)
return
end if
allcoeffs(:,1) = coeffs

! Establish the epsilon term - to do, come up with a better resampling approach
! Establish the epsilon term
eps = standard_deviation(y) / sqrt(real(n))
call random_init(.false., .true.)
call random_number(ally)
ally = eps * (ally - half)

! Cycle over each data set
! Cycle over each data set and perform the fit
#ifdef USEOPENMP
!$OMP PARALLEL DO PRIVATE(fLocal, yLocal, rLocal) SHARED(allcoeffs)
do i = 2, ns
! Allocate local arrays on a per-thread basis
if (.not.allocated(fLocal)) allocate(fLocal(n))
if (.not.allocated(yLocal)) allocate(yLocal(n))
if (.not.allocated(rLocal)) allocate(rLocal(n))

! Compute a random data set
call random_number(yLocal)
yLocal = eps * (yLocal - half) + y

! Compute the fit of the perturbed data set
call linear_least_squares(order, intercept, x, yLocal, &
allcoeffs(:,i), fLocal, rLocal, alpha = alph)
end do
!$OMP END PARALLEL DO
#else
! OpenMP is not available - run in a serial manner
allocate(fLocal(n), yLocal(n), rLocal(n))
do i = 2, ns
! Compute a random data set
call random_number(yLocal)
yLocal = eps * (yLocal - half) + y

! Compute the fit of the perturbed data set
ally(:,i-1) = ally(:,i-1) + y
call linear_least_squares(order, intercept, x, ally(:,i-1), &
allcoeffs(:,i), allf(:,i), allresid(:,i), alpha = alph)
call linear_least_squares(order, intercept, x, yLocal, &
allcoeffs(:,i), fLocal, rLocal, alpha = alph)
end do
#endif

! Perform statistics calculations, if needed
if (present(stats)) then
Expand Down Expand Up @@ -122,9 +141,14 @@ module subroutine bs_linear_least_squares_real64(order, intercept, x, y, &
! Compute the bias for each parameter, if needed
if (present(bias)) then
! Verify the size of the array
if (size(bias) /= ncoeffs) then
call report_array_size_error(errmgr, &
"bs_linear_least_squares_real64", "bias", ncoeffs, size(bias))
return
end if

! Perform the calculations
do i = 1, nc
do i = 1, ncoeffs
bias(i) = coeffs(i) - mean(allcoeffs(i,:))
end do
end if
Expand Down
106 changes: 106 additions & 0 deletions tests/fstats_regression_tests.f90
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,111 @@ function regression_test_1() result(rst)
end if
end function

! ------------------------------------------------------------------------------
function bootstrap_regression_test_1() result(rst)
! Arguments
logical :: rst

! Variables
integer(int32), parameter :: npts = 31
real(real64), parameter :: tol = 1.0d-6
real(real64), parameter :: c1a(2) = [0.570009521158539d0, 0.486564710761349d0]
real(real64), parameter :: slope2 = 0.7668972622d0
real(real64) :: x(npts), y(npts), ymod(npts), resid(npts), c1(2), c2(2)

! Initialization
rst = .true.
x = [ &
0.0d0, &
0.1d0, &
0.2d0, &
0.3d0, &
0.4d0, &
0.5d0, &
0.6d0, &
0.7d0, &
0.8d0, &
0.9d0, &
1.0d0, &
1.1d0, &
1.2d0, &
1.3d0, &
1.4d0, &
1.5d0, &
1.6d0, &
1.7d0, &
1.8d0, &
1.9d0, &
2.0d0, &
2.1d0, &
2.2d0, &
2.3d0, &
2.4d0, &
2.5d0, &
2.6d0, &
2.7d0, &
2.8d0, &
2.9d0, &
3.0d0 &
]
y = [ &
0.577855138260449d0, &
0.614883095604222d0, &
0.633891127488559d0, &
0.718405829701721d0, &
0.753668502759107d0, &
0.814967857310145d0, &
0.861870996499704d0, &
0.925100533744381d0, &
0.947038018520063d0, &
1.025198043343280d0, &
1.042142354497610d0, &
1.121528566784440d0, &
1.177570314994070d0, &
1.229237567525370d0, &
1.261114062593870d0, &
1.296408162551430d0, &
1.394353657051120d0, &
1.367144391560370d0, &
1.428164431435150d0, &
1.548944935073270d0, &
1.505100149282990d0, &
1.560701023751520d0, &
1.609113012481530d0, &
1.663687366875500d0, &
1.707149545456870d0, &
1.800935947618110d0, &
1.847819988906440d0, &
1.884242821675810d0, &
1.966174239373140d0, &
1.977005266443110d0, &
2.034137257154140d0 &
]

! Fit the model - linear model with intercept
call bootstrap_linear_least_squares(1, .true., x, y, c1, ymod, resid)

if (.not.is_equal(c1(1), c1a(1), tol)) then
rst = .false.
print '(A)', "TEST FAILED: Regression Test 1 - 1"
end if
if (.not.is_equal(c1(2), c1a(2), tol)) then
rst = .false.
print '(A)', "TEST FAILED: Regression Test 1 - 2"
end if

! Fit the model - linear model without intercept
call bootstrap_linear_least_squares(1, .false., x, y, c2, ymod, resid)

if (.not.is_equal(c2(1), 0.0d0)) then
rst = .false.
print '(A)', "TEST FAILED: Regression Test 1 - 3"
end if
if (.not.is_equal(c2(2), slope2)) then
rst = .false.
print '(A)', "TEST FAILED: Regression Test 1 - 4"
end if
end function

! ------------------------------------------------------------------------------
end module
3 changes: 3 additions & 0 deletions tests/fstats_tests.f90
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ program tests
local = regression_test_1()
if (.not.local) overall = .false.

local = bootstrap_regression_test_1()
if (.not.local) overall = .false.

! Experimental Design
local = get_full_matrix_size_test_1()
if (.not.local) overall = .false.
Expand Down

0 comments on commit db84baf

Please sign in to comment.