-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Polinomial-regression.html
464 lines (408 loc) · 24.4 KB
/
Polinomial-regression.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta content="width=device-width, initial-scale=1.0" name="viewport">
<title>Polinomial-regression</title>
<meta content="" name="description">
<meta content="" name="keywords">
<!-- Favicons -->
<link href="assets/img/Favicon-1.png" rel="icon">
<link href="assets/img/Favicon-1.png" rel="apple-touch-icon">
<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i|Raleway:300,300i,400,400i,500,500i,600,600i,700,700i|Poppins:300,300i,400,400i,500,500i,600,600i,700,700i" rel="stylesheet">
<!-- Vendor CSS Files -->
<link href="assets/vendor/aos/aos.css" rel="stylesheet">
<link href="assets/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="assets/vendor/bootstrap-icons/bootstrap-icons.css" rel="stylesheet">
<link href="assets/vendor/boxicons/css/boxicons.min.css" rel="stylesheet">
<link href="assets/vendor/glightbox/css/glightbox.min.css" rel="stylesheet">
<link href="assets/vendor/swiper/swiper-bundle.min.css" rel="stylesheet">
<!-- Creating a python code section-->
<link rel="stylesheet" href="assets/css/prism.css">
<script src="assets/js/prism.js"></script>
<!-- Template Main CSS File -->
<link href="assets/css/style.css" rel="stylesheet">
<!-- To set the icon, visit https://fontawesome.com/account-->
<script src="https://kit.fontawesome.com/5d25c1efd3.js" crossorigin="anonymous"></script>
<!-- end of icon-->
<script type="text/javascript" async
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
<!-- =======================================================
* Template Name: iPortfolio
* Updated: Sep 18 2023 with Bootstrap v5.3.2
* Template URL: https://bootstrapmade.com/iportfolio-bootstrap-portfolio-websites-template/
* Author: BootstrapMade.com
* License: https://bootstrapmade.com/license/
======================================================== -->
</head>
<body>
<!-- ======= Mobile nav toggle button ======= -->
<i class="bi bi-list mobile-nav-toggle d-xl-none"></i>
<!-- ======= Header ======= -->
<header id="header">
<div class="d-flex flex-column">
<div class="profile">
<img src="assets/img/myphoto.jpeg" alt="" class="img-fluid rounded-circle">
<h1 class="text-light"><a href="index.html">Arun</a></h1>
<div class="social-links mt-3 text-center">
<a href="https://www.linkedin.com/in/arunp77/" target="_blank" class="linkedin"><i class="bx bxl-linkedin"></i></a>
<a href="https://github.com/arunp77" target="_blank" class="github"><i class="bx bxl-github"></i></a>
<a href="https://twitter.com/arunp77_" target="_blank" class="twitter"><i class="bx bxl-twitter"></i></a>
<a href="https://www.instagram.com/arunp77/" target="_blank" class="instagram"><i class="bx bxl-instagram"></i></a>
<a href="https://arunp77.medium.com/" target="_blank" class="medium"><i class="bx bxl-medium"></i></a>
</div>
</div>
<nav id="navbar" class="nav-menu navbar">
<ul>
<li><a href="index.html#hero" class="nav-link scrollto active"><i class="bx bx-home"></i> <span>Home</span></a></li>
<li><a href="index.html#about" class="nav-link scrollto"><i class="bx bx-user"></i> <span>About</span></a></li>
<li><a href="index.html#resume" class="nav-link scrollto"><i class="bx bx-file-blank"></i> <span>Resume</span></a></li>
<li><a href="index.html#portfolio" class="nav-link scrollto"><i class="bx bx-book-content"></i> <span>Portfolio</span></a></li>
<li><a href="index.html#skills-and-tools" class="nav-link scrollto"><i class="bx bx-wrench"></i> <span>Skills and Tools</span></a></li>
<li><a href="index.html#language" class="nav-link scrollto"><i class="bi bi-menu-up"></i> <span>Languages</span></a></li>
<li><a href="index.html#awards" class="nav-link scrollto"><i class="bi bi-award-fill"></i> <span>Awards</span></a></li>
<li><a href="index.html#professionalcourses" class="nav-link scrollto"><i class="bx bx-book-alt"></i> <span>Professional Certification</span></a></li>
<li><a href="index.html#publications" class="nav-link scrollto"><i class="bx bx-news"></i> <span>Publications</span></a></li>
<li><a href="index.html#extra-curricular" class="nav-link scrollto"><i class="bx bx-rocket"></i> <span>Extra-Curricular Activities</span></a></li>
<!-- <li><a href="#contact" class="nav-link scrollto"><i class="bx bx-envelope"></i> <span>Contact</span></a></li> -->
</ul>
</nav><!-- .nav-menu -->
</div>
</header><!-- End Header -->
<main id="main">
<!-- ======= Breadcrumbs ======= -->
<section id="breadcrumbs" class="breadcrumbs">
<div class="container">
<div class="d-flex justify-content-between align-items-center">
<h2>Machine learning</h2>
<ol>
<li><a href="machine-learning.html" class="clickable-box">Content section</a></li>
<li><a href="index.html#portfolio" class="clickable-box">Portfolio section</a></li>
</ol>
</div>
</div>
</section><!-- End Breadcrumbs -->
<!------ right dropdown menue ------->
<div class="right-side-list">
<div class="dropdown">
<button class="dropbtn"><strong>Shortcuts:</strong></button>
<div class="dropdown-content">
<ul>
<li><a href="cloud-compute.html"><i class="fas fa-cloud"></i> Cloud</a></li>
<li><a href="AWS-GCP.html"><i class="fas fa-cloud"></i> AWS-GCP</a></li>
<li><a href="amazon-s3.html"><i class="fas fa-cloud"></i> AWS S3</a></li>
<li><a href="ec2-confi.html"><i class="fas fa-server"></i> EC2</a></li>
<li><a href="Docker-Container.html"><i class="fab fa-docker" style="color: rgb(29, 27, 27);"></i> Docker</a></li>
<li><a href="Jupyter-nifi.html"><i class="fab fa-python" style="color: rgb(34, 32, 32);"></i> Jupyter-nifi</a></li>
<li><a href="snowflake-task-stream.html"><i class="fas fa-snowflake"></i> Snowflake</a></li>
<li><a href="data-model.html"><i class="fas fa-database"></i> Data modeling</a></li>
<li><a href="sql-basics.html"><i class="fas fa-table"></i> QL</a></li>
<li><a href="sql-basic-details.html"><i class="fas fa-database"></i> SQL</a></li>
<li><a href="Bigquerry-sql.html"><i class="fas fa-database"></i> Bigquerry</a></li>
<li><a href="scd.html"><i class="fas fa-archive"></i> SCD</a></li>
<li><a href="sql-project.html"><i class="fas fa-database"></i> SQL project</a></li>
<!-- Add more subsections as needed -->
</ul>
</div>
</div>
</div>
<!-- ======= Portfolio Details Section ======= -->
<section id="portfolio-details" class="portfolio-details">
<div class="container">
<div class="row gy-4">
<h1>Polinomial Regression</h1>
<div class="col-lg-8">
<div class="portfolio-details-slider swiper">
<div class="swiper-wrapper align-items-center">
<figure>
<img src="assets/img/machine-ln/polinomial-reg.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
</div>
<div class="swiper-pagination"></div>
</div>
</div>
<div class="col-lg-4 grey-box">
<div class="section-title">
<h3>Content</h3>
<ol>
<li><a href="#introduction">Introduction</a></li>
<li><a href="#methods">Method for fitting polynomial regression models</a></li>
<li><a href="#interpretation">Interpreting the coefficients</a></li>
<li><a href="#example">Example</a></li>
<li><a href="#reference">Reference</a></li>
</ol>
</div>
</div>
</div>
<section id="introduction">
<h2>Intorduction</h2>
Polynomial regression is a statistical technique for modeling a relationship between a dependent variable
(<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>y</mi></math>) and one or more independent variables (<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>x</mi></math>) using a polynomial function. In other words, it is a way of fitting a curve to a set of data points.
The general form of the polynomial regression model is:
$$y = \beta_0 +\beta_1 x +\beta_2 x^2 + .... \beta_n x^n$$
where,
<ul>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>y</mi></math> = is the dependent variable.</li>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>x</mi></math> is the independent variable.</li>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>0</sub> <mo>,</mo> <mi>β</mi> <sub>1</sub> <mo>,</mo> <mo>...</mo> <mo>,</mo> <mi>β</mi> <sub>n</sub> </math> are the coefficients of the polynomial terms.</li>
</ul>
The degree of the polynomial is determined by the highest power of <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>x</mi></math>, denoted as <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>n</mi></math>.
<p>For example, a polinomial of degree 2 is quadratic equation, and a polinomial of degree 3 is a cubic equation.</p>
<!--------------------->
<h3>Fitting a polynomial regression model</h3>
<p>The goal of polynomial regression is to fit the polinomial curve to the data in a way that minimizes the sum of squared differences between the observed and predicted values i.e. RSS.
The RSS is a measure of the error between the predicted values of y and the actual values of y.
</p>
<p>The equation for a simple linear regression (degree 1) is a special case of polynomial regression, where n =1.</p>
$$y = \beta_0 +\beta_1 x$$
The coefficients <math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <mn>0</mn> <mo>,</mo> <mi>β</mi> <mn>1</mn> <mo>,</mo> <mo>...</mo> <mo>,</mo> <mi>β</mi> <mi>n</mi> </math>
are typically estimated using methods such as the method of least squares. The model is then used to make predictions based on new values of
<math xmlns="http://www.w3.org/1998/Math/MathML"><mi>x</mi></math>.
<p>It's important to note that while polynomial regression allows for a more flexible fit to the data, it also runs the risk of overfitting, especially with higher-degree polynomials. Overfitting occurs when the model captures noise or fluctuations in the training data, leading to poor generalization to new, unseen data.</p>
<div class="box">
More about the overfitting can be found at: <a href="https://arunp77.github.io/Arun-Kumar-Pandey/Linear-reg.html#overfit-goodfit-underfit" target="_blank">Overfitting, underfitting and good fit</a>.
</div>
<!---------------->
<h3 id="methods">Method for fitting polynomial regression models</h3>
There are two main methods for fitting polynomial regression models:
<ul>
<li><strong>Least squares:</strong> This is the most common method for fitting polynomial regression models. It uses an iterative algorithm to find the coefficients that minimize the RSS.</li>
<li><strong>Regularization:</strong> This method can be used to prevent overfitting, which occurs when a model fits the training data too well and does not generalize well to new data. There are several different regularization methods, but the most common is ridge regression.</li>
</ul>
<!-------------->
<h3 id="interpretation">Interpreting the coefficients</h3>
The coefficients of a polynomial regression model can be interpreted in the following way:
<ul>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>0</sub></math> is the average value of y</li>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>1</sub></math> is the slope of the line or curve at the point (0, <math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>0</sub></math>)</li>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>2</sub></math> is the rate of change of the slope</li>
<li><math xmlns="http://www.w3.org/1998/Math/MathML"> <mi>β</mi> <sub>3</sub></math> is the rate of change of the rate of change </li>
<li> ... </li>
</ul>
<!----------------------->
<h3>Applications of polynomial regression</h3>
Polynomial regression has a wide variety of applications, including:
<ul>
<li><strong>Predicting sales:</strong> Polynomial regression can be used to predict sales based on factors such as price, advertising, and economic conditions.</li>
<li><strong>Modeling the growth of plants:</strong> Polynomial regression can be used to model the growth of plants based on factors such as temperature, sunlight, and nutrients.</li>
<li><strong>Analyzing financial data:</strong> Polynomial regression can be used to analyze financial data to identify trends and patterns.</li>
<li><strong>Improving the accuracy of machine learning models:</strong> Polynomial regression can be used to improve the accuracy of machine learning models by providing them with a more complex and flexible representation of the data.</li>
</ul>
</section>
<section id="example">
<h3>Example-1</h3>
<ul>
<li><strong>Importing the libraries: </strong>
<pre class="language-python"><code>
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
</code></pre>
</li>
<li><strong>Generating datasets/ loading the datasets: </strong>
In our current example, we generate a random dataset for X and y. Specially in our current example, we have considered following equation to generate the random data for y:
$$y = \frac{1}{2} x^2 +\frac{3}{2} x +2 +\text{outliers}$$
and hence python code is:
<pre class="language-python"><code>
X = 6 * np.random.rand(100,1)-3
y = 0.5*X**2 + 1.5*X + 2 + np.random.randn(100,1)
# quadratic equation is shown above
plt.scatter(X, y, color='r')
plt.xlabel("X")
plt.ylabel("y")
plt.show()
</code></pre>
which gives the data for our example and plot the generated datasets.
<figure>
<img src="assets/img/machine-ln/genreated-poly-data.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
</li>
<li><strong>Simple line:</strong>Now let's start with the simple line which is actually case of degree=1
<pre class="language-python"><code>
## Apply linear regression
from sklearn.linear_model import LinearRegression
regression1 = LinearRegression()
regression1.fit(X_train, y_train)
## plot Training data plot and best fit line
plt.scatter(X_train, y_train, color = 'b')
plt.plot(X_train, regression1.predict(X_train), color = 'r')
plt.xlabel("X_train")
plt.ylabel("y_pred")
plt.show()
from sklearn.metrics import r2_score
score = r2_score(y_test, regression1.predict(X_test))
print(f"The r-squared value for the model is= {score}")
</code></pre>
so it gave <code>The r-squared value for the model is= 0.6405513731105184</code> and
<figure>
<img src="assets/img/machine-ln/degree-1.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
The coefficient in the case can be obtained using: <code>regression.coef_ = [[1.43280818]].</code>
</li>
<li><strong>Quadratic equation:</strong>
Next we are going to use following equation:
$$y = \beta_0 +\beta_1 x +\beta_2 x^2$$
<pre class="language-python"><code>
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 2, include_bias = True)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
from sklearn.metrics import r2_score
regression = LinearRegression()
regression.fit(X_train_poly, y_train)
y_pred = regression.predict(X_test_poly)
score = r2_score(y_test, y_pred)
print(score)
</code></pre>
which gave the value of r-square = <code>0.8726125379887142</code> which some shows significant improvment.
<figure>
<img src="assets/img/machine-ln/degree-2.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
</li>
The coefficient in the case can be obtained using: <code>regression.coef_ = [[0. 1.47171982 0.42463995]].</code>
<li><strong>Cubic:</strong>
<pre class="language-python"><code>
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 3, include_bias = True)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
from sklearn.metrics import r2_score
regression = LinearRegression()
regression.fit(X_train_poly, y_train)
y_pred1 = regression.predict(X_test_poly)
score = r2_score(y_test, y_pred)
print(score)
from sklearn.metrics import r2_score
regression = LinearRegression()
regression.fit(X_train_poly, y_train)
y_pred1 = regression.predict(X_test_poly)
score = r2_score(y_test, y_pred)
print(score)
</code></pre>
This gave r-square value = <code>0.8620083765320085</code> almost same value as it was in the case of degree 2.
<p><strong>Prediction:</strong></p> For a new datsets:
<pre class="language-python"><code>
## Prediction for new data
X_new = np.linspace(-3,3,200).reshape(200,1)
X_new_poly = poly.transform(X_new)
y_new = regression.predict(X_new_poly)
plt.plot(X_new, y_new, "r-", linewidth = 2, label = "New Prediction")
plt.plot(X_train, y_train, "y.", label = "Training points")
plt.plot(X_test, y_test, "b.", label ="Testing points")
plt.xlabel("X")
plt.plot("y")
plt.legend()
plt.show()
</code></pre>
we will have following plot:
<figure>
<img src="assets/img/machine-ln/degree-3-prediction.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
</li>
<li><strong>Creating pipeline for any degree:</strong>
In this case, we first define a generic function and then supply the new dataset for the fitting.
<pre class="language-python"><code>
from sklearn.pipeline import Pipeline
def poly_regression(degree, X_new):
"""Function to fit and predict polynomial of specified degree"""
poly_features = PolynomialFeatures(degree=degree, include_bias=True)
lin_reg = LinearRegression()
poly_regression = Pipeline([
("poly_features", poly_features),
("lin_reg", lin_reg)
])
poly_regression.fit(X_train, y_train)
y_pred_new = poly_regression.predict(X_new)
return y_pred_new
</code></pre>
Then we provide the datset and plot all degrees.
<pre class="language-python"><code>
# Generate X_new once for all degrees
X_new = np.linspace(-3, 3, 200).reshape(200, 1)
# Plotting for degrees 0, 1, 2, 3, 4
for degree in range(5):
y_pred = poly_regression(degree, X_new)
plt.plot(X_new, y_pred, label="Degree " + str(degree), linewidth=2)
plt.legend(loc="upper left")
plt.plot(X_train, y_train, "b.", linewidth=3, label="Training Data")
plt.plot(X_test, y_test, "g.", linewidth=3, label="Test Data")
plt.xlabel("X")
plt.ylabel("y")
plt.axis([-4, 4, 0, 10])
plt.show()
</code></pre>
<figure>
<img src="assets/img/machine-ln/all-degree-pipeline.png" alt="" style="max-width: 60%; max-height: auto;">
<figcaption></figcaption>
</figure>
</li>
</ul>
</section>
<!-------Reference ------->
<section id="reference">
<h2>References</h2>
<ul>
<li>My github Repositories on Remote sensing <a href="https://github.com/arunp77/Machine-Learning/" target="_blank">Machine learning</a></li>
<li><a href="https://mlu-explain.github.io/linear-regression/" target="_blank">A Visual Introduction To Linear regression</a> (Best reference for theory and visualization).</li>
<li>Book on Regression model: <a href="https://avehtari.github.io/ROS-Examples/" target="_blank">Regression and Other Stories</a></li>
<li>Book on Statistics: <a href="https://hastie.su.domains/Papers/ESLII.pdf" target="_blank">The Elements of Statistical Learning</a></li>
</ul>
</section>
<hr>
<div style="background-color: #f0f0f0; padding: 15px; border-radius: 5px;">
<h3>Some other interesting things to know:</h3>
<ul style="list-style-type: disc; margin-left: 30px;">
<li>Visit my website on <a href="sql-project.html">For Data, Big Data, Data-modeling, Datawarehouse, SQL, cloud-compute.</a></li>
<li>Visit my website on <a href="Data-engineering.html">Data engineering</a></li>
</ul>
</div>
<p></p>
<div class="navigation">
<a href="index.html#portfolio" class="clickable-box">
<span class="arrow-left">Portfolio section</span>
</a>
<a href="machine-learning.html" class="clickable-box">
<span class="arrow-right">Content</span>
</a>
</div>
</div>
</div>
</section><!-- End Portfolio Details Section -->
</main><!-- End #main --
<!-- ======= Footer ======= -->
<footer id="footer">
<div class="container">
<div class="copyright">
© Copyright <strong><span>Arun</span></strong>
</div>
</div>
</footer><!-- End Footer -->
<a href="#" class="back-to-top d-flex align-items-center justify-content-center"><i class="bi bi-arrow-up-short"></i></a>
<!-- Vendor JS Files -->
<script src="assets/vendor/purecounter/purecounter_vanilla.js"></script>
<script src="assets/vendor/aos/aos.js"></script>
<script src="assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<script src="assets/vendor/glightbox/js/glightbox.min.js"></script>
<script src="assets/vendor/isotope-layout/isotope.pkgd.min.js"></script>
<script src="assets/vendor/swiper/swiper-bundle.min.js"></script>
<script src="assets/vendor/typed.js/typed.umd.js"></script>
<script src="assets/vendor/waypoints/noframework.waypoints.js"></script>
<script src="assets/vendor/php-email-form/validate.js"></script>
<!-- Template Main JS File -->
<script src="assets/js/main.js"></script>
<script>
document.addEventListener("DOMContentLoaded", function () {
hljs.initHighlightingOnLoad();
});
</script>
</body>
</html>