-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
761 lines (708 loc) · 45.2 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
@book{allen1997,
title = {Understanding regression analysis},
author = {Allen, Michael Patrick},
date = {1997},
publisher = {Plenum Press},
location = {New York},
isbn = {978-0-306-45648-0},
pagetotal = {216}
}
@article{anderson1952,
title = {Asymptotic theory of certain "goodness of fit" criteria based on stochastic processes},
author = {Anderson, T. W. and Darling, D. A.},
date = {1952},
journaltitle = {The Annals of Mathematical Statistics},
volume = {23},
number = {2},
eprint = {2236446},
eprinttype = {jstor},
pages = {193--212},
publisher = {Institute of Mathematical Statistics},
issn = {0003-4851},
url = {https://www.jstor.org/stable/2236446},
urldate = {2024-09-28},
abstract = {The statistical problem treated is that of testing the hypothesis that n independent, identically distributed random variables have a specified continuous distribution function F(x). If Fn(x) is the empirical cumulative distribution function and ψ(t) is some nonnegative weight function (0 ≤ t ≤ 1), we consider \$n\textasciicircum\{\textbackslash frac\{1\}\{2\}\} \textbackslash sup\_\{-\textbackslash infty\vphantom\}},
langid = {english}
}
@article{anderson1954,
title = {A test of goodness of fit},
author = {Anderson, T. W. and Darling, D. A.},
date = {1954-12-01},
journaltitle = {Journal of the American Statistical Association},
volume = {49},
number = {268},
pages = {765--769},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1954.10501232},
abstract = {Some (large sample) significance points are tabulated for a distribution-free test of goodness of fit which was introduced earlier by the authors. The test, which uses the actual observations without grouping, is sensitive to discrepancies at the tails of the distribution rather than near the median. An illustration is given, using a numerical example used previously by Birnbaum in illustrating the Kolmogorov test.},
langid = {english}
}
@article{anderson1962,
title = {On the distribution of the two-sample {{Cramer-von Mises}} criterion},
author = {Anderson, T. W.},
date = {1962-09},
journaltitle = {The Annals of Mathematical Statistics},
volume = {33},
number = {3},
pages = {1148--1159},
publisher = {Institute of Mathematical Statistics},
issn = {0003-4851, 2168-8990},
doi = {10.1214/aoms/1177704477},
abstract = {The Cramer-von Mises \$\textbackslash omega\textasciicircum 2\$ criterion for testing that a sample, \$x\_1, \textbackslash cdots, x\_N\$, has been drawn from a specified continuous distribution \$F(x)\$ is \textbackslash begin\{equation*\}\textbackslash tag\{1\}\textbackslash omega\textasciicircum 2 = \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - F(x)\textbackslash rbrack\textasciicircum 2 dF(x),\textbackslash end\{equation*\} where \$F\_N(x)\$ is the empirical distribution function of the sample; that is, \$F\_N(x) = k/N\$ if exactly \$k\$ observations are less than or equal to \$x(k = 0, 1, \textbackslash cdots, N)\$. If there is a second sample, \$y\_1, \textbackslash cdots, y\_M\$, a test of the hypothesis that the two samples come from the same (unspecified) continuous distribution can be based on the analogue of \$N\textbackslash omega\textasciicircum 2\$, namely \textbackslash begin\{equation*\}\textbackslash tag\{2\} T = \textbackslash lbrack NM/(N + M)\textbackslash rbrack \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - G\_M(x)\textbackslash rbrack\textasciicircum 2 dH\_\{N+M\}(x),\textbackslash end\{equation*\} where \$G\_M(x)\$ is the empirical distribution function of the second sample and \$H\_\{N+M\}(x)\$ is the empirical distribution function of the two samples together [that is, \$(N + M)H\_\{N+M\}(x) = NF\_N(x) + MG\_M(x)\textbackslash rbrack\$. The limiting distribution of \$N\textbackslash omega\textasciicircum 2\$ as \$N \textbackslash rightarrow \textbackslash infty\$ has been tabulated [2], and it has been shown ([3], [4a], and [7]) that \$T\$ has the same limiting distribution as \$N \textbackslash rightarrow \textbackslash infty, M \textbackslash rightarrow \textbackslash infty\$, and \$N/M \textbackslash rightarrow \textbackslash lambda\$, where \$\textbackslash lambda\$ is any finite positive constant. In this note we consider the distribution of \$T\$ for small values of \$N\$ and \$M\$ and present tables to permit use of the criterion at some conventional significance levels for small values of \$N\$ and \$M\$. The limiting distribution seems a surprisingly good approximation to the exact distribution for moderate sample sizes (corresponding to the same feature for \$N\textbackslash omega\textasciicircum 2\$ [6]). The accuracy of approximation is better than in the case of the two-sample Kolmogorov-Smirnov statistic studied by Hodges [4].},
langid = {english}
}
@article{arif2022,
title = {Predictive models aren't for causal inference},
author = {Arif, Suchinta and MacNeil, M. Aaron},
date = {2022-08},
journaltitle = {Ecology Letters},
shortjournal = {Ecology Letters},
volume = {25},
number = {8},
pages = {1741--1745},
issn = {1461-023X, 1461-0248},
doi = {10.1111/ele.14033},
langid = {english}
}
@book{belsley2004,
title = {Regression diagnostics: identifying influential data and sources of collinearity},
shorttitle = {Regression diagnostics},
author = {Belsley, David A. and Kuh, Edwin and Welsch, Roy E.},
date = {2004},
series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
publisher = {John Wiley \& Sons},
location = {Hoboken, NJ},
doi = {10.1002/0471725153},
abstract = {The Wiley-Interscience Paperback Series consists of selected books that have been made more accessible to consumers in an effort to increase global appeal and general circulation. With these new unabridged softcover volumes, Wiley hopes to extend the lives of these works by making them available to future generations of statisticians, mathematicians, and scientists. "The title of the book more or less sums up the contents. It appears to me to represent a real breakthrough in the art of dealing in ‘unconventional’ data. . . . I found the whole book both readable and enjoyable. It is suitable for data analysts, academic statisticians, and professional software writers." –Journal of the Royal Statistical Society "The book assumes a working knowledge of all of the principal results and techniques used in least squares multiple regression, as expressed in vector and matrix notation. Given this background, the book is clear and easy to use. . . . The techniques are illustrated in great detail with practical data sets from econometrics." –Short Book Reviews, International Statistical Institute Regression Diagnostics: Identifying Influential Data and Sources of Collinearity provides practicing statisticians and econometricians with new tools for assessing quality and reliability of regression estimates. Diagnostic techniques are developed that aid in the systematic location of data points that are unusual or inordinately influential; measure the presence and intensity of collinear relations among the regression data; and help to identify variables involved in each and pinpoint estimated coefficients potentially most adversely affected. The book emphasizes diagnostics and includes suggestions for remedial action.},
isbn = {0-471-69117-8},
langid = {english},
pagetotal = {292},
annotation = {Print ISBN: 9780471058564\\
Online ISBN: 9780471725152}
}
@article{bera1981,
title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals: {{Monte Carlo Evidence}}},
shorttitle = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
author = {Bera, Anil K. and Jarque, Carlos M.},
date = {1981-01-01},
journaltitle = {Economics Letters},
shortjournal = {Economics Letters},
volume = {7},
number = {4},
pages = {313--318},
issn = {0165-1765},
doi = {10.1016/0165-1765(81)90035-5},
abstract = {In this paper we study the performance of various tests for normality (N), homoscedasticity (H) and serial independence (I) of regression residuals (u) under one, two and three directional departures from HO:u∼NHI.},
langid = {english}
}
@article{bonett2002,
title = {A test of normality with high uniform power},
author = {Bonett, Douglas G and Seier, Edith},
date = {2002-09-28},
journaltitle = {Computational Statistics \& Data Analysis},
shortjournal = {Computational Statistics \& Data Analysis},
volume = {40},
number = {3},
pages = {435--445},
issn = {0167-9473},
doi = {10.1016/S0167-9473(02)00074-9},
abstract = {Kurtosis can be measured in more than one way. A modification of Geary's measure of kurtosis is shown to be more sensitive to kurtosis in the center of the distribution while Pearson's measure of kurtosis is more sensitive to kurtosis in the tails of the distribution. The modified Geary measure and the Pearson measure are used to define a joint test of kurtosis that has high uniform power across a very wide range of symmetric nonnormal distributions.},
langid = {english}
}
@article{box1970,
title = {Distribution of residual autocorrelations in autoregressive-integrated moving average time series models},
author = {Box, G. E. P. and Pierce, David A.},
date = {1970-12-01},
journaltitle = {Journal of the American Statistical Association},
volume = {65},
number = {332},
pages = {1509--1526},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1970.10481180},
abstract = {Many statistical models, and in particular autoregressive—moving average time series models, can be regarded as means of transforming the data to white noise, that is, to an uncorrected sequence of errors. If the parameters are known exactly, this random sequence can be computed directly from the observations; when this calculation is made with estimates substituted for the true parameter values, the resulting sequence is referred to as the “residuals,” which can be regarded as estimates of the errors. If the appropriate model has been chosen, there will be zero autocorrelation in the errors. In checking adequacy of fit it is therefore logical to study the sample autocorrelation function of the residuals. For large samples the residuals from a correctly fitted model resemble very closely the true errors of the process; however, care is needed in interpreting the serial correlations of the residuals. It is shown here that the residual autocorrelations are to a close approximation representable as a singular linear transformation of the autocorrelations of the errors so that they possess a singular normal distribution. Failing to allow for this results in a tendency to overlook evidence of lack of fit. Tests of fit and diagnostic checks are devised which take these facts into account.},
langid = {english}
}
@article{breusch1979,
title = {A simple test for heteroscedasticity and random coefficient variation},
author = {Breusch, T. S. and Pagan, A. R.},
date = {1979},
journaltitle = {Econometrica},
volume = {47},
number = {5},
eprint = {1911963},
eprinttype = {jstor},
pages = {1287--1294},
publisher = {[Wiley, Econometric Society]},
issn = {0012-9682},
doi = {10.2307/1911963},
abstract = {A simple test for heteroscedastic disturbances in a linear regression model is developed using the framework of the Lagrangian multiplier test. For a wide range of heteroscedastic and random coefficient specifications, the criterion is given as a readily computed function of the OLS residuals. Some finite sampleevidence is presented to supplement the general asymptotic properties of Lagrangian multiplier tests.},
langid = {english}
}
@book{bussab1988,
title = {Análise de variância e de regressão: uma introdução},
shorttitle = {Análise de variância e de regressão},
author = {Bussab, Wilton de Oliveira},
date = {1988},
series = {Métodos quantitativos},
edition = {2},
publisher = {Atlas},
location = {São Paulo},
langid = {brazilian}
}
@book{casella2002,
title = {Statistical inference},
author = {Casella, George and Berger, Roger L.},
date = {2002},
series = {Duxbury advanced series},
edition = {2},
publisher = {Duxbury},
location = {Pacific Grove, CA},
isbn = {0-534-24312-6},
langid = {english},
pagetotal = {660}
}
@article{cook1977,
title = {Detection of influential observation in linear regression},
author = {Cook, R. Dennis},
date = {1977-02},
journaltitle = {Technometrics},
volume = {19},
number = {1},
pages = {15--18},
publisher = {ASA Website},
issn = {0040-1706},
doi = {10.1080/00401706.1977.10489493},
langid = {english}
}
@article{cook1979,
title = {Influential observations in linear regression},
author = {Cook, R. Dennis},
date = {1979-03-01},
journaltitle = {Journal of the American Statistical Association},
volume = {74},
number = {365},
pages = {169--174},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1979.10481634},
abstract = {Characteristics of observations which cause them to be influential in a least squares analysis are investigated and related to residual variances, residual correlations, and the convex hull of the observed values of the independent variables. It is shown how deleting an observation can substantially alter an analysis by changing the partial F-tests, the studentized residuals, the residual variances, the convex hull of the independent variables, and the estimated parameter vector. Outliers are discussed briefly, and an example is presented.},
langid = {english}
}
@article{cramer1928,
title = {On the composition of elementary errors: {{First}} paper: {{Mathematical}} deductions},
shorttitle = {On the composition of elementary errors},
author = {Cramér, Harald},
date = {1928-01-01},
journaltitle = {Scandinavian Actuarial Journal},
volume = {1928},
number = {1},
pages = {13--74},
publisher = {Taylor \& Francis},
issn = {0346-1238},
doi = {10.1080/03461238.1928.10416862},
langid = {english}
}
@article{dagostino1971,
title = {An omnibus test of normality for moderate and large size samples},
author = {D'Agostino, Ralph B.},
date = {1971-08-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {58},
number = {2},
pages = {341--348},
issn = {0006-3444},
doi = {10.1093/biomet/58.2.341},
abstract = {We present a test of normality based on a statistic D which is up to a constant the ratio of Downton's linear unbiased estimator of the population standard deviation to the sample standard deviation. For the usual levels of significance Monte Carlo simulations indicate that Cornish-Fisher expansions adequately approximate the null distribution of D if the sample size is 50 or more. The test is an omnibus test, being appropriate to detect deviations from normality due either to skewness or kurtosis. Simulation results of powers for various alternatives when the sample size is 50 indicate that the test compares favourably with the Shapiro-Wilk W test,√1, b2 and the ratio of range to standard deviation.},
langid = {english}
}
@article{dagostino1973,
title = {Tests for departure from normality. {{Empirical}} results for the distributions of b2 and √b1},
author = {D'Agostino, Ralph B. and Pearson, E. S.},
date = {1973},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {60},
number = {3},
pages = {613--622},
issn = {0006-3444, 1464-3510},
doi = {10.1093/biomet/60.3.613},
langid = {english}
}
@article{dagostino1990,
title = {A suggestion for using powerful and informative tests of normality},
author = {D'Agostino, Ralph B. and Belanger, Albert},
date = {1990},
journaltitle = {The American Statistician},
volume = {44},
number = {4},
eprint = {2684359},
eprinttype = {jstor},
pages = {316--321},
publisher = {[American Statistical Association, Taylor \& Francis, Ltd.]},
issn = {0003-1305},
doi = {10.2307/2684359},
abstract = {For testing that an underlying population is normally distributed the skewness and kurtosis statistics, \$\textbackslash sqrt\{b\_1\}\$ and b2, and the D'Agostino-Pearson K2 statistic that combines these two statistics have been shown to be powerful and informative tests. Their use, however, has not been as prevalent as their usefulness. We review these tests and show how readily available and popular statistical software can be used to implement them. Their relationship to deviations from linearity in normal probability plotting is also presented.}
}
@article{dallal1986,
title = {An analytic approximation to the distribution of {{Lilliefors}}'s test statistic for normality},
author = {Dallal, Gerard E. and Wilkinson, Leland},
date = {1986-11},
journaltitle = {The American Statistician},
shortjournal = {The American Statistician},
volume = {40},
number = {4},
pages = {294--296},
issn = {0003-1305, 1537-2731},
doi = {10.1080/00031305.1986.10475419},
langid = {english}
}
@book{dalpiaz,
title = {Applied statistics with {{R}}},
author = {Dalpiaz, David},
url = {https://book.stat420.org/},
langid = {english}
}
@book{degroot2012,
title = {Probability and statistics},
author = {DeGroot, Morris H. and Schervish, Mark J.},
date = {2012},
edition = {4},
publisher = {Addison-Wesley},
location = {Boston},
isbn = {978-0-321-50046-5},
langid = {english},
pagetotal = {893},
annotation = {OCLC: ocn502674206}
}
@book{dudek2020,
title = {Linear models with {{R}}: emphasis on 2-{{IV}} models: basics of multiple regression},
author = {Dudek, Bruce},
date = {2020-09-09},
url = {https://bcdudek.net/regression1/},
langid = {english}
}
@article{durbin1950,
title = {Testing for serial correlation in least squares regression. {{I}}},
author = {Durbin, J. and Watson, G. S.},
date = {1950-12-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {37},
number = {3-4},
pages = {409--428},
issn = {0006-3444},
doi = {10.1093/biomet/37.3-4.409},
langid = {english}
}
@article{durbin1951,
title = {Testing for serial correlation in least squares regression. {{II}}},
author = {Durbin, J. and Watson, G. S.},
date = {1951-06-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {38},
number = {1-2},
pages = {159--178},
issn = {0006-3444},
doi = {10.1093/biomet/38.1-2.159},
langid = {english}
}
@article{durbin1971,
title = {Testing for serial correlation in least squares regression. {{III}}},
author = {Durbin, J. and Watson, G. S.},
date = {1971-04-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {58},
number = {1},
pages = {1--19},
issn = {0006-3444},
doi = {10.1093/biomet/58.1.1},
abstract = {The paper considers a number of problems arising from the test of serial correlation based on the d statistic proposed earlier by the authors (Durbin \& Watson, 1950, 1951). Methods of computing the exact distribution of d are investigated and the exact distribution is compared with six approximations to it for four sets of published data. It is found that approximations suggested by Theil and Nagar and by Hannan are too inaccurate for practical use but that the beta approximation proposed in the 1950 and 1951 papers and a new approximation, called by us the a + bdu approximation and based, like the beta approximation, on the exact first two moments of d, both perform well.The power of the d test is compared with that of certain exact tests proposed by Theil, Durbin, Koerts and Abrahamse from the standpoint of invariance theory. It is shown that the d test is locally most powerful invariant but that the other tests are not.There are three appendices. The first gives an account of the exact distribution of d. The second derives the mean and variance to a second order of approximation of a modified maximum likelihood statistic closely related to d. The third sets out details of the computations required for the a + hdu approximation.},
langid = {english}
}
@book{fox2016,
title = {Applied regression analysis and generalized linear models},
author = {Fox, John},
date = {2016},
edition = {3},
publisher = {Sage},
location = {Thousand Oaks, CA},
isbn = {978-1-4522-0566-3},
langid = {english},
pagetotal = {791}
}
@article{gorman2014,
title = {Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (genus pygoscelis)},
author = {Gorman, Kristen B. and Williams, Tony D. and Fraser, William R.},
date = {2014-03-05},
journaltitle = {PLOS ONE},
shortjournal = {PLOS ONE},
volume = {9},
number = {3},
pages = {e90081},
publisher = {Public Library of Science},
issn = {1932-6203},
doi = {10.1371/journal.pone.0090081},
abstract = {Background Sexual segregation in vertebrate foraging niche is often associated with sexual size dimorphism (SSD), i.e., ecological sexual dimorphism. Although foraging behavior of male and female seabirds can vary markedly, differences in isotopic (carbon, δ13C and nitrogen, δ15N) foraging niche are generally more pronounced within sexually dimorphic species and during phases when competition for food is greater. We examined ecological sexual dimorphism among sympatric nesting Pygoscelis penguins asking whether environmental variability is associated with differences in male and female pre-breeding foraging niche. We predicted that all Pygoscelis species would forage sex-specifically, and that higher quality winter habitat, i.e., higher or lower sea ice coverage for a given species, would be associated with a more similar foraging niche among the sexes. Results P2/P8 primers reliably amplified DNA of all species. On average, male Pygoscelis penguins are structurally larger than female conspecifics. However, chinstrap penguins were more sexually dimorphic in culmen and flipper features than Adélie and gentoo penguins. Adélies and gentoos were more sexually dimorphic in body mass than chinstraps. Only male and female chinstraps and gentoos occupied separate δ15N foraging niches. Strong year effects in δ15N signatures were documented for all three species, however, only for Adélies, did yearly variation in δ15N signatures tightly correlate with winter sea ice conditions. There was no evidence that variation in sex-specific foraging niche interacted with yearly winter habitat quality. Conclusion Chinstraps were most sexually size dimorphic followed by gentoos and Adélies. Pre-breeding sex-specific foraging niche was associated with overall SSD indices across species; male chinstrap and gentoo penguins were enriched in δ15N relative to females. Our results highlight previously unknown trophic pathways that link Pygoscelis penguins with variation in Southern Ocean sea ice suggesting that each sex within a species should respond similarly in pre-breeding trophic foraging to changes in future winter habitat.},
langid = {english}
}
@online{greener2020,
title = {Stop testing for normality},
author = {Greener, Robert},
date = {2020-08-04T12:53:26},
url = {https://towardsdatascience.com/stop-testing-for-normality-dba96bb73f90},
urldate = {2024-09-29},
abstract = {Normality tests are misleading and a waste of your time!},
langid = {english},
organization = {Medium}
}
@book{hair2019,
title = {Multivariate data analysis},
author = {Hair, Joseph F.},
date = {2019},
edition = {8},
publisher = {Cengage},
location = {Andover, Hampshire},
isbn = {978-1-4737-5654-0},
langid = {english},
pagetotal = {813}
}
@article{jarque1980,
title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
author = {Jarque, Carlos M. and Bera, Anil K.},
date = {1980-01-01},
journaltitle = {Economics Letters},
shortjournal = {Economics Letters},
volume = {6},
number = {3},
pages = {255--259},
issn = {0165-1765},
doi = {10.1016/0165-1765(80)90024-5},
abstract = {We use the Lagrange multiplier procedure to derive efficient joint tests for residual normality, homoscedasticity and serial independence. The tests are simple to compute and asymptotically distributed as χ2.},
langid = {english}
}
@article{jarque1987,
title = {A test for normality of observations and regression residuals},
author = {Jarque, Carlos M. and Bera, Anil K.},
date = {1987},
journaltitle = {International Statistical Review},
volume = {55},
number = {2},
eprint = {1403192},
eprinttype = {jstor},
pages = {163--172},
publisher = {[Wiley, International Statistical Institute (ISI)]},
issn = {0306-7734},
doi = {10.2307/1403192},
abstract = {Using the Lagrange multiplier procedure or score test on the Pearson family of distributions we obtain tests for normality of observations and regression disturbances. The tests suggested have optimum asymptotic power properties and good finite sample performance. Due to their simplicity they should prove to be useful tools in statistical analysis.},
langid = {english}
}
@book{johnson2013,
title = {Applied multivariate statistical analysis: {{Pearson}} new international edition},
shorttitle = {Applied multivariate statistical analysis},
author = {Johnson, Richard and Wichern, Dean},
date = {2013},
edition = {6},
publisher = {Pearson},
location = {Harlow, UK},
abstract = {For courses in Multivariate Statistics, Marketing Research, Intermediate Business Statistics, Statistics in Education, and graduate-level courses in Experimental Design and Statistics. Appropriate for experimental scientists in a variety of disciplines, this market-leading text offers a readable introduction to the statistical analysis of multivariate observations. Its primary goal is to impart the knowledge necessary to make proper interpretations and select appropriate techniques for analysing multivariate data. Ideal for a junior/senior or graduate level course that explores the statistical methods for describing and analysing multivariate data, the text assumes two or more statistics courses as a prerequisite. The full text downloaded to your computer With eBooks you can: search for key concepts, words and phrases make highlights and notes as you study share your notes with friends eBooks are downloaded to your computer and accessible either offline through the Bookshelf (available as a free download), available online and also via the iPad and Android apps. Upon purchase, you will receive via email the code and instructions on how to access this product. Time limit The eBooks products do not have an expiry date. You will continue to access your digital ebook products whilst you have your Bookshelf installed.},
isbn = {978-1-292-03757-8},
langid = {english},
annotation = {OCLC: 1277290670\\
\\
Pearson New International Edition.}
}
@article{koenker1981,
title = {A note on studentizing a test for heteroscedasticity},
author = {Koenker, Roger},
date = {1981-09-01},
journaltitle = {Journal of Econometrics},
shortjournal = {Journal of Econometrics},
volume = {17},
number = {1},
pages = {107--112},
issn = {0304-4076},
doi = {10.1016/0304-4076(81)90062-2},
abstract = {Breusch and Pagan (1979) have recently proposed a convenient test for heteroscedasticity in general linear models. This note derives the asymptotic distribution of their test under sequences of contiguous alternatives to the null hypothesis of homoscedasticity. The test is shown to possess asymptotically incorrect size (nominal significance level) except in the case of strictly Gaussian disturbances. A slight modification of the test is proposed which corrects this defect.},
langid = {english}
}
@article{kolmogorov1933,
title = {Sulla determinazione empirica di una legge di distribuzione},
author = {Kolmogorov, A.},
date = {1933},
journaltitle = {Giornale dell'Istituto Italiano degli Attuari},
volume = {4},
langid = {italian}
}
@article{kozak2018,
title = {What's normal anyway? {{Residual}} plots are more telling than significance tests when checking {{ANOVA}} assumptions},
shorttitle = {What's normal anyway?},
author = {Kozak, M. and Piepho, H.-P.},
date = {2018},
journaltitle = {Journal of Agronomy and Crop Science},
volume = {204},
number = {1},
pages = {86--98},
issn = {1439-037X},
doi = {10.1111/jac.12220},
abstract = {We consider two questions important for applying analysis of variance (ANOVA): Should normality be checked on the raw data or on the residuals (or is it immaterial which of the two approaches we take)? Should normality and homogeneity of variance be checked using significance tests or diagnostic plots (or both)? Based on two examples, we show that residuals should be used for model checking and that residual plots are better for checking ANOVA assumptions than statistical tests. We also discuss why one should be very cautious when using statistical tests to check the assumptions.},
langid = {english}
}
@book{kuhn2022,
title = {Tidy modeling with {{R}}: a framework for modeling in the tidyverse},
shorttitle = {Tidy modeling with {{R}}},
author = {Kuhn, Max and Silge, Julia},
date = {2022},
publisher = {O'Reilly Media},
location = {Sebastopol, CA},
url = {https://www.tmwr.org/},
abstract = {Get going with tidymodels, a collection of R packges for modeling and machine learning. Whether you're just starting out or have years of experience with modeling, this practical introduction shows data analysts, business analysts, and data scientists how the tidymodels framework offers a consistent, flexible approach for your work. RStudio engineers Max Kuhn and Julia Silge demonstrate ways to create models by focusing on an R dialect called the tidyverse. Software that adops tidyverse principles shares both a high-level design philosophy and low-level grammar and data structures, so learning one piece of the ecosystem makes it easier to learn the next. You'll understand why the tidymodels framework has been built to be used by a broad range of people.},
isbn = {978-1-4920-9648-1},
langid = {english},
pagetotal = {363},
annotation = {OCLC: on1338675673}
}
@article{lilliefors1967,
title = {On the {{Kolmogorov-Smirnov}} test for normality with mean and variance unknown},
author = {Lilliefors, Hubert W.},
date = {1967-06},
journaltitle = {Journal of the American Statistical Association},
shortjournal = {Journal of the American Statistical Association},
volume = {62},
number = {318},
pages = {399--402},
issn = {0162-1459, 1537-274X},
doi = {10.1080/01621459.1967.10482916},
langid = {english}
}
@article{ljung1978,
title = {On a measure of lack of fit in time series models},
author = {Ljung, G. M. and Box, G. E. P.},
date = {1978-08-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {65},
number = {2},
pages = {297--303},
issn = {0006-3444},
doi = {10.1093/biomet/65.2.297},
abstract = {The overall test for lack of fit in autoregressive-moving average models proposed by Box \& Pierce (1970) is considered. It is shown that a substantially improved approximation results from a simple modification of this test. Some consideration is given to the power of such tests and their robustness when the innovations are nonnormal. Similar modifications in the overall tests used for transfer function-noise models are proposed},
langid = {english}
}
@article{massey1951,
title = {The {{Kolmogorov-Smirnov}} test for goodness of fit},
author = {Massey, Frank J.},
date = {1951-03},
journaltitle = {Journal of the American Statistical Association},
shortjournal = {Journal of the American Statistical Association},
volume = {46},
number = {253},
pages = {68--78},
issn = {0162-1459, 1537-274X},
doi = {10.1080/01621459.1951.10500769},
langid = {english}
}
@article{newey1987,
title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix},
author = {Newey, Whitney K. and West, Kenneth D.},
date = {1987},
journaltitle = {Econometrica},
volume = {55},
number = {3},
eprint = {1913610},
eprinttype = {jstor},
pages = {703--708},
publisher = {[Wiley, Econometric Society]},
issn = {0012-9682},
doi = {10.2307/1913610},
langid = {english}
}
@article{newey1994,
title = {Automatic lag selection in covariance matrix estimation},
author = {Newey, Whitney K. and West, Kenneth D.},
date = {1994-10-01},
journaltitle = {The Review of Economic Studies},
shortjournal = {The Review of Economic Studies},
volume = {61},
number = {4},
pages = {631--653},
issn = {0034-6527},
doi = {10.2307/2297912},
abstract = {We propose a nonparametric method for automatically selecting the number of autocovariances to use in computing a heteroskedasticity and autocorrelation consistent covariance matrix. For a given kernel for weighting the autocovariances, we prove that our procedure is asymptotically equivalent to one that is optimal under a mean-squared error loss function. Monte Carlo simulations suggest that our procedure performs tolerably well, although it does result in size distortions.}
}
@article{pearson1900,
title = {X. {{On}} the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling},
author = {Pearson, Karl},
date = {1900-07},
journaltitle = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science},
volume = {50},
number = {302},
pages = {157--175},
publisher = {Taylor \& Francis},
issn = {1941-5982},
doi = {10.1080/14786440009463897},
langid = {english}
}
@article{peek2003,
title = {How much variance is explained by ecologists? {{Additional}} perspectives},
shorttitle = {How much variance is explained by ecologists?},
author = {Peek, Michael S. and Leffler, A. Joshua and Flint, Stephan D. and Ryel, Ronald J.},
date = {2003},
journaltitle = {Oecologia},
volume = {137},
number = {2},
eprint = {4223745},
eprinttype = {jstor},
pages = {161--170},
publisher = {Springer},
issn = {0029-8549},
url = {https://www.jstor.org/stable/4223745},
urldate = {2024-09-29},
abstract = {A recent meta-analysis of meta-analyses by Møller and Jennions (2002, Oecologia 132: 492-500) suggested that ecologists using statistical models are explaining between 2.5\% and 5.42\% of the variability in ecological studies. Although we agree that there is considerable variability in ecological systems that is not explained, we disagree with the approach and general conclusions of Møller and Jennions. As an alternate perspective, we explored the question: "How much ecological variation in relationships is not explained?" We did this by examining published studies in five different journals representative of the numerous sub-disciplines of ecology. We quantified the proportion of variance not explained in statistical models as the residual or random error compared to the total variation in the data set. Our results indicate that statistical models explain roughly half of the variation in variables of interest, vastly different from the 2.5\%-5.42\% reported by Møller and Jennions. This difference resulted largely from a different level of analysis: we considered the original study to be the appropriate level for quantifying variability while Møller and Jennions combined studies at different temporal and spatial scales and attempted to find universal single-factor relationships between ecological variables across study organisms or locations. Therefore, we believe that Møller and Jennions actually measured the universality of single factor effects across multiple ecological systems, not the amount of variability in ecological studies explained by ecologists. This study, combined with Møller and Jennions', illustrates importance of applying statistical models appropriately to assess ecological relationships.},
langid = {english}
}
@book{popper1979,
title = {Objective knowledge: an evolutionary approach},
shorttitle = {Objective knowledge},
author = {Popper, Karl R.},
date = {1979},
publisher = {Oxford University Press},
location = {Oxford, UK},
isbn = {978-0-19-824370-0},
langid = {english},
pagetotal = {395},
annotation = {Publicado originalmente em 1972.}
}
@article{ramsey1969,
title = {Tests for specification errors in classical linear least-squares regression analysis},
author = {Ramsey, J. B.},
date = {1969},
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
volume = {31},
number = {2},
pages = {350--371},
publisher = {[Royal Statistical Society, Oxford University Press]},
issn = {0035-9246},
doi = {10.1111/j.2517-6161.1969.tb00796.x},
abstract = {The effects on the distribution of least-squares residuals of a series of model mis-specifications are considered. It is shown that for a variety of specification errors the distributions of the least-squares residuals are normal, but with non-zero means. An alternative predictor of the disturbance vector is used in developing four procedures for testing for the presence of specification error. The specification errors considered are omitted variables, incorrect functional form, simultaneous equation problems and heteroskedasticity.},
langid = {english}
}
@article{schucany2006,
title = {Preliminary goodness-of-fit tests for normality do not validate the one-sample {{Student}} t},
author = {Schucany, William R. and Ng, H. K. Tony},
date = {2006-12-01},
journaltitle = {Communications in Statistics - Theory and Methods},
volume = {35},
number = {12},
pages = {2275--2286},
publisher = {Taylor \& Francis Group},
doi = {10.1080/03610920600853308},
abstract = {One of the most basic topics in many introductory statistical methods texts is inference for a population mean, μ. The primary tool for confidence intervals and tests is the Student t sampling dist...},
langid = {english}
}
@article{shapiro1965,
title = {An analysis of variance test for normality (complete samples)†},
author = {Shapiro, S. S. and Wilk, M. B.},
date = {1965-12-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {52},
number = {3-4},
pages = {591--611},
issn = {0006-3444},
doi = {10.1093/biomet/52.3-4.591},
langid = {english}
}
@article{shapiro1972,
title = {An approximate analysis of variance test for normality},
author = {Shapiro, S. S. and Francia, R. S.},
date = {1972-03-01},
journaltitle = {Journal of the American Statistical Association},
volume = {67},
number = {337},
pages = {215--216},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1972.10481232},
abstract = {This article presents a modification of the Shapiro-Wilk W statistic for testing normality which can be used with large samples. Shapiro and Wilk gave coefficients and percentage points for sample sizes up to 50. These coefficients required obtaining an approximation to the covariance matrix of the normal order statistics. The proposed test uses coefficients which depend only on the expected values of the normal order statistics which are generally available. Results of an empirical sampling study to compare the sensitivity of the test statistic to the W test statistic are briefly discussed.},
langid = {english}
}
@article{shatz2024,
title = {Assumption-checking rather than (just) testing: the importance of visualization and effect size in statistical diagnostics},
shorttitle = {Assumption-checking rather than (just) testing},
author = {Shatz, Itamar},
date = {2024-02-01},
journaltitle = {Behavior Research Methods},
shortjournal = {Behav Res},
volume = {56},
number = {2},
pages = {826--845},
issn = {1554-3528},
doi = {10.3758/s13428-023-02072-x},
abstract = {Statistical methods generally have assumptions (e.g., normality in linear regression models). Violations of these assumptions can cause various issues, like statistical errors and biased estimates, whose impact can range from inconsequential to critical. Accordingly, it is important to check these assumptions, but this is often done in a flawed way. Here, I first present a prevalent but problematic approach to diagnostics—testing assumptions using null hypothesis significance tests (e.g., the Shapiro–Wilk test of normality). Then, I consolidate and illustrate the issues with this approach, primarily using simulations. These issues include statistical errors (i.e., false positives, especially with large samples, and false negatives, especially with small samples), false binarity, limited descriptiveness, misinterpretation (e.g., of p-value as an~effect size), and potential testing failure due to unmet test assumptions. Finally, I synthesize the implications of these issues for statistical diagnostics, and provide practical recommendations for improving such diagnostics. Key recommendations include maintaining awareness of the issues with assumption tests (while recognizing they can be useful), using appropriate combinations of diagnostic methods (including visualization and effect sizes) while recognizing their limitations, and distinguishing between testing and checking assumptions. Additional recommendations include judging assumption violations as a complex spectrum (rather than a simplistic binary), using programmatic tools that increase replicability and decrease researcher degrees of freedom, and sharing the material and rationale involved in the diagnostics.},
langid = {english}
}
@article{smirnov1948,
title = {Table for estimating the goodness of fit of empirical distributions},
author = {Smirnov, N.},
date = {1948},
journaltitle = {Annals of Mathematical Statistics},
volume = {19},
pages = {279--281}
}
@book{struck2024,
title = {Regression {{Diagnostics}} with {{R}}},
author = {Struck, Jason},
date = {2024-06},
publisher = {University of Wisconsin-Madison},
location = {Madison, WI},
url = {https://sscc.wisc.edu/sscc/pubs/RegDiag-R/},
urldate = {2024-09-29},
abstract = {This book uses R. A Stata version of this book is available at Regression Diagnostics with Stata. Regression diagnostics are a critical step in the modeling process. Diagnostics for regression models are tools that assess a model’s compliance to its assumptions and investigate if there is a single observation or group of observations that are not well represented by the model. These tools allow researchers to evaluate if a model appropriately represents the data of their study. In this book we separate diagnostics from the other parts of model selection to provide a focus on this important topic. This separation is not meant to imply that these tools are used separately from other regression modeling tools.},
langid = {english}
}
@book{thode2002,
title = {Testing for normality},
author = {Thode, Henry C.},
date = {2002},
series = {Statistics, textbooks and monographs},
number = {164},
publisher = {Marcel Dekker},
location = {New York},
isbn = {978-0-8247-9613-6},
langid = {english},
pagetotal = {479}
}
@article{white1980,
title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity},
author = {White, Halbert},
date = {1980},
journaltitle = {Econometrica},
volume = {48},
number = {4},
eprint = {1912934},
eprinttype = {jstor},
pages = {817--838},
publisher = {[Wiley, Econometric Society]},
issn = {0012-9682},
doi = {10.2307/1912934},
abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.},
langid = {english}
}
@article{zeileis2004,
title = {Econometric computing with {{HC}} and {{HAC}} covariance matrix estimators},
author = {Zeileis, Achim},
date = {2004-11-29},
journaltitle = {Journal of Statistical Software},
volume = {11},
number = {10},
pages = {1--17},
issn = {1548-7660},
doi = {10.18637/jss.v011.i10},
abstract = {Data described by econometric models typically contains autocorrelation and/or heteroskedasticity of unknown form and for inference in such models it is essential to use covariance matrix estimators that can consistently estimate the covariance of the model parameters. Hence, suitable heteroskedasticity consistent (HC) and heteroskedasticity and autocorrelation consistent (HAC) estimators have been receiving attention in the econometric literature over the last 20 years. To apply these estimators in practice, an implementation is needed that preferably translates the conceptual properties of the underlying theoretical frameworks into computational tools. In this paper, such an implementation in the package sandwich in the R system for statistical computing is described and it is shown how the suggested functions provide reusable components that build on readily existing functionality and how they can be integrated easily into new inferential procedures or applications. The toolbox contained in sandwich is extremely flexible and comprehensive, including specific functions for the most important HC and HAC estimators from the econometric literature. Several real-world data sets are used to illustrate how the functionality can be integrated into applications.},
langid = {english}
}