-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Big-data.html
296 lines (245 loc) · 22.4 KB
/
Big-data.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta content="width=device-width, initial-scale=1.0" name="viewport">
<title>Portfolio Details</title>
<meta content="" name="description">
<meta content="" name="keywords">
<!-- Favicons -->
<link href="assets/img/Favicon-1.png" rel="icon">
<link href="assets/img/Favicon-1.png" rel="apple-touch-icon">
<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i|Raleway:300,300i,400,400i,500,500i,600,600i,700,700i|Poppins:300,300i,400,400i,500,500i,600,600i,700,700i" rel="stylesheet">
<!-- Vendor CSS Files -->
<link href="assets/vendor/aos/aos.css" rel="stylesheet">
<link href="assets/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="assets/vendor/bootstrap-icons/bootstrap-icons.css" rel="stylesheet">
<link href="assets/vendor/boxicons/css/boxicons.min.css" rel="stylesheet">
<link href="assets/vendor/glightbox/css/glightbox.min.css" rel="stylesheet">
<link href="assets/vendor/swiper/swiper-bundle.min.css" rel="stylesheet">
<!-- Creating a python code section-->
<link rel="stylesheet" href="assets/css/prism.css">
<script src="assets/js/prism.js"></script>
<!-- Template Main CSS File -->
<link href="assets/css/style.css" rel="stylesheet">
<!-- To set the icon, visit https://fontawesome.com/account-->
<script src="https://kit.fontawesome.com/5d25c1efd3.js" crossorigin="anonymous"></script>
<!-- end of icon-->
<!-- =======================================================
* Template Name: iPortfolio
* Updated: Sep 18 2023 with Bootstrap v5.3.2
* Template URL: https://bootstrapmade.com/iportfolio-bootstrap-portfolio-websites-template/
* Author: BootstrapMade.com
* License: https://bootstrapmade.com/license/
======================================================== -->
</head>
<body>
<!-- ======= Mobile nav toggle button ======= -->
<i class="bi bi-list mobile-nav-toggle d-xl-none"></i>
<!-- ======= Header ======= -->
<header id="header">
<div class="d-flex flex-column">
<div class="profile">
<img src="assets/img/myphoto.jpeg" alt="" class="img-fluid rounded-circle">
<h1 class="text-light"><a href="index.html">Arun</a></h1>
<div class="social-links mt-3 text-center">
<a href="https://www.linkedin.com/in/arunp77/" target="_blank" class="linkedin"><i class="bx bxl-linkedin"></i></a>
<a href="https://github.com/arunp77" target="_blank" class="github"><i class="bx bxl-github"></i></a>
<a href="https://twitter.com/arunp77_" target="_blank" class="twitter"><i class="bx bxl-twitter"></i></a>
<a href="https://www.instagram.com/arunp77/" target="_blank" class="instagram"><i class="bx bxl-instagram"></i></a>
<a href="https://arunp77.medium.com/" target="_blank" class="medium"><i class="bx bxl-medium"></i></a>
</div>
</div>
<nav id="navbar" class="nav-menu navbar">
<ul>
<li><a href="index.html#hero" class="nav-link scrollto active"><i class="bx bx-home"></i> <span>Home</span></a></li>
<li><a href="index.html#about" class="nav-link scrollto"><i class="bx bx-user"></i> <span>About</span></a></li>
<li><a href="index.html#resume" class="nav-link scrollto"><i class="bx bx-file-blank"></i> <span>Resume</span></a></li>
<li><a href="index.html#portfolio" class="nav-link scrollto"><i class="bx bx-book-content"></i> <span>Portfolio</span></a></li>
<li><a href="index.html#skills-and-tools" class="nav-link scrollto"><i class="bx bx-wrench"></i> <span>Skills and Tools</span></a></li>
<li><a href="index.html#language" class="nav-link scrollto"><i class="bi bi-menu-up"></i> <span>Languages</span></a></li>
<li><a href="index.html#awards" class="nav-link scrollto"><i class="bi bi-award-fill"></i> <span>Awards</span></a></li>
<li><a href="index.html#professionalcourses" class="nav-link scrollto"><i class="bx bx-book-alt"></i> <span>Professional Certification</span></a></li>
<li><a href="index.html#publications" class="nav-link scrollto"><i class="bx bx-news"></i> <span>Publications</span></a></li>
<!-- <li><a href="index.html#extra-curricular" class="nav-link scrollto"><i class="bx bx-rocket"></i> <span>Extra-Curricular Activities</span></a></li> -->
<!-- <li><a href="#contact" class="nav-link scrollto"><i class="bx bx-envelope"></i> <span>Contact</span></a></li> -->
</ul>
</nav><!-- .nav-menu -->
</div>
</header><!-- End Header -->
<main id="main">
<!-- ======= Breadcrumbs ======= -->
<section id="breadcrumbs" class="breadcrumbs">
<div class="container">
<div class="d-flex justify-content-between align-items-center">
<h2></h2>
<ol>
<li><a href="content-page.html" class="clickable-box">< Go to content</a></li>
<li><a href="index.html#portfolio" class="clickable-box">Go to portfolio</a></li>
</ol>
</div>
</div>
</section><!-- End Breadcrumbs -->
<!------ right dropdown menue ------->
<div class="right-side-list">
<div class="dropdown">
<button class="dropbtn"><strong>Shortcuts:</strong></button>
<div class="dropdown-content">
<ul>
<li><a href="cloud-compute.html"><i class="fas fa-cloud"></i> Cloud</a></li>
<li><a href="AWS-GCP.html"><i class="fas fa-cloud"></i> AWS-GCP</a></li>
<li><a href="amazon-s3.html"><i class="fas fa-cloud"></i> AWS S3</a></li>
<li><a href="ec2-confi.html"><i class="fas fa-server"></i> EC2</a></li>
<li><a href="Docker-Container.html"><i class="fab fa-docker" style="color: rgb(29, 27, 27);"></i> Docker</a></li>
<li><a href="Jupyter-nifi.html"><i class="fab fa-python" style="color: rgb(34, 32, 32);"></i> Jupyter-nifi</a></li>
<li><a href="snowflake-task-stream.html"><i class="fas fa-snowflake"></i> Snowflake</a></li>
<li><a href="data-model.html"><i class="fas fa-database"></i> Data modeling</a></li>
<li><a href="sql-basics.html"><i class="fas fa-table"></i> QL</a></li>
<li><a href="sql-basic-details.html"><i class="fas fa-database"></i> SQL</a></li>
<li><a href="Bigquerry-sql.html"><i class="fas fa-database"></i> Bigquerry</a></li>
<li><a href="scd.html"><i class="fas fa-archive"></i> SCD</a></li>
<li><a href="sql-project.html"><i class="fas fa-database"></i> SQL project</a></li>
<!-- Add more subsections as needed -->
</ul>
</div>
</div>
</div>
<!-- ======= Portfolio Details Section ======= -->
<section id="portfolio-details" class="portfolio-details">
<div class="container">
<div class="row gy-4">
<h1>Big Data</h1>
<div class="col-lg-8">
<div class="portfolio-details-slider swiper">
<div class="swiper-wrapper align-items-center">
<figure style="text-align: center;">
<img src="assets/img/portfolio/big-data.png" alt="" style="max-width: 50%; max-height: 50%;">
</figure>
</div>
<div class="swiper-pagination"></div>
</div>
</div>
<h1>Big data</h1>
<p>Big data refers to extremely large and complex datasets that cannot be easily managed, processed, or analyzed using traditional data processing techniques. It encompasses vast volumes of structured, semi-structured, and unstructured data that are generated at high velocity and variety.</p>
<h5>The "Five Vs" of Big Data</h5>
Just because a data set is large does not necessarily mean it is big data. To qualify as big data, data must meet at least the following five characteristics:
<ul style="list-style-type: disc; margin-left: 30px;">
<li><strong>Volume:</strong> While volume is far from the only reason big data is called "big," it is undoubtedly a key characteristic. To fully manage and leverage big data, advanced algorithms and AI-driven analytics are required. Before that can happen, however, there must be a secure and reliable means of storing, organizing, and retrieving the many terabytes of data that large enterprises accumulate.</li>
<li><strong>Velocity:</strong> In the past, all data generated had to be fed into a traditional database system, often manually, before it could be analyzed or retrieved. Big data technology today enables databases to process, analyze and configure data as it is being generated - sometimes in milliseconds. This enables companies to leverage real-time data to seize financial opportunities, respond to customer needs, prevent fraud and address any other activity where speed is critical.</li>
<li><strong>Variety:</strong> Datasets made up entirely of structured data aren't necessarily big data, no matter how large. Big data typically consists of combinations of structured, unstructured, and semi-structured data. Traditional databases and data management solutions lack the flexibility and functionality to manage the complex, disparate datasets that make up big data.</li>
<li><strong>Veracity:</strong> While modern database technology allows organizations to capture and make meaningful use of large amounts of data and diverse types of data, it is only valuable if it is accurate, relevant, and up-to-date. With traditional databases containing only structured data, syntactic errors and typos were the common culprits when it came to data accuracy. With unstructured data comes a whole new set of reliability challenges. Human bias, social noise, and questions about data lineage can impact data quality.</li>
<li><strong>Value:</strong> There is no question that the results of analyzing big data are often fascinating and unexpected. But for businesses, these analytics need to provide insights that help them become more competitive and resilient — and better serve their customers. Modern big data technologies enable data collection and retrieval that deliver measurable value for both business outcomes and operational resilience.</li>
</ul>
<img src="assets/img/portfolio/big-data-fiveVs.png" alt="" style="max-width: 90%; max-height: 90%;">
<h5>How big data works?</h5>
<p>Big data is useful when the analysis provides relevant and actionable insights that measurably advance the business. With regard to the transformation of big data, companies should ensure that their systems and processes are sufficiently prepared for the collection, storage and analysis of this large amount of data.</p>
<p>The three most important steps in using Big Data:
<ul>
<li>Capturing big data</li>
<li>Store big data</li>
<li>Analyzing big data</li>
</ul>
</p>
<img src="assets/img/portfolio/big-data-work.png" alt="" style="max-width: 90%; max-height: 90%;">
<h5>Big data analytics</h5>
<p>Big Data Analytics involves analyzing vast volumes of structured, semi-structured, and unstructured data to uncover patterns, trends, and insights that can drive business decisions and strategies. It leverages advanced analytics techniques and technologies to process and interpret data from diverse sources.</p>
<h5>Life cycle phases of big data analytics</h5>
<p>The following are the phases in the life cycle of big data analytics in brief:</p>
<ul style="list-style-type: disc; margin-left: 30px;">
<li><strong>Data Ingestion:</strong> This is the process of collecting, extracting, and loading data from various sources into a centralized data repository.</li>
<li><strong>Data Preparation:</strong> This is the cleaning, transforming, and preparing of data for analysis.</li>
<li><strong>Data Exploration and Modeling:</strong> This is the process of using various analytical techniques and tools to uncover patterns and insights in the data.</li>
<li><strong>Data Visualization and Reporting:</strong> This is the process of using visual aids to communicate the findings from the data analysis.</li>
<li><strong>Data Lifecycle Management:</strong> This is the process of managing the data throughout its lifecycle, from ingestion to visualization and reporting.</li>
</ul>
<img src="assets/img/portfolio/big-data-lifecycle.png" alt="" style="max-width: 60%; max-height: 60%;">
<h5>Big Data Analytics stages</h5>
<p>Big Data Analytics involves several stages:</p>
<ul style="list-style-type: disc; margin-left: 30px;">
<li>Data Acquisition</li>
<li>Data Storage</li>
<li>Data processing</li>
<li>Data analysis</li>
<li>Visualization and reporting</li>
</ul>
<img src="assets/img/portfolio/big-data-pipeline.png" alt="" style="max-width: 100%; max-height: 1000%;">
<h5>Benefits of Big Data Analytics:</h5>
<p>Big data analytics and tools are used to help businesses improve their operations, decision-making processes, and overall performance. Here are some ways in which big data analytics and tools can contribute to business improvement:</p>
<ul style="list-style-type: disc; margin-left: 30px;">
<li><strong>Improved Decision-making:</strong> Enables data-driven decision-making based on accurate insights and evidence.</li>
<li><strong>Enhanced Operational Efficiency:</strong> Identifies inefficiencies, optimizes processes, and reduces costs.</li>
<li><strong>Personalized Customer Experiences:</strong> Enables customization, targeted marketing, and better customer satisfaction.</li>
<li><strong>Predictive Capabilities:</strong> Provides insights for forecasting trends, identifying risks, and proactive decision-making</li>
<li><strong>Innovation and Discovery:</strong> Uncovers new patterns, opportunities, and potential innovations.</li>
</ul>
<h5>Challenges of Big Data Analytics:</h5>
<ul style="list-style-type: disc; margin-left: 30px;">
<li><strong>Data Volume and Variety:</strong> Handling and processing large and diverse datasets require scalable infrastructure and appropriate tools.</li>
<li><strong>Data Quality and Validity:</strong> Ensuring data accuracy, consistency, and relevancy can be challenging, especially with unstructured or incomplete data.</li>
<li><strong>Data Privacy and Security:</strong> Protecting sensitive data and complying with regulations to maintain privacy and security.</li>
<li><strong>Skills and Expertise:</strong> Acquiring and retaining skilled data professionals with expertise in big data analytics can be a challenge.</li>
<li><strong>Cost and Infrastructure:</strong> Establishing and maintaining the necessary infrastructure, tools, and technologies can be expensive.</li>
</ul>
<h5>How to Improve the Accuracy of Big Data Analysis?</h5>
<p>Improving the accuracy of big data analysis is crucial for obtaining reliable and actionable insights. Here are some key steps you can take to enhance the accuracy of your big data analysis:</p>
<ul style="list-style-type: disc; margin-left: 30px;">
<li><strong>Data Quality Assurance:</strong> Ensure that the data being analyzed is of high quality. Perform data cleaning, preprocessing, and validation to address issues such as missing values, outliers, inconsistencies, and data integrity problems. Validate the accuracy and reliability of data sources, and establish data governance practices to maintain data quality throughout the analysis process.</li>
<li><strong>Feature Selection and Engineering:</strong> Identify the most relevant features (variables) that have a strong impact on the analysis and remove irrelevant or redundant features. Feature engineering involves transforming or creating new features that can improve the accuracy of the analysis. This process requires domain expertise and an understanding of the specific problem being addressed.</li>
<li><strong>Proper Data Sampling:</strong> Sampling techniques can be used to select representative subsets of data for analysis, especially when dealing with large datasets. Ensure that the selected samples accurately represent the entire dataset and maintain the integrity of the analysis results. The choice of sampling method should be aligned with the objectives and characteristics of the data.</li>
<li><strong>Model Selection and Validation:</strong> Choose appropriate modeling techniques and algorithms that are well-suited for the specific analysis task. Validate the selected models using proper evaluation methods such as cross-validation, holdout validation, or bootstrap methods. Regularly review and refine the models to improve accuracy and address any overfitting or underfitting issues.</li>
<li><strong>Ensemble Methods:</strong> Employ ensemble methods that combine multiple models to improve accuracy. Ensemble techniques, such as bagging, boosting, and stacking, can help in reducing bias, variance, and error rates in the analysis. By leveraging the strengths of different models, ensemble methods can produce more accurate predictions or classifications.</li>
<li><strong>Continuous Monitoring and Iterative Refinement:</strong> Implement a feedback loop to continuously monitor the accuracy and performance of the analysis. Regularly evaluate the results against ground truth or real-world outcomes to identify any discrepancies or areas for improvement. Use the feedback to refine the analysis process, update models, and incorporate new data or insights.</li>
<li><strong>Domain Expertise and Contextual Understanding:</strong> Apply domain expertise and contextual understanding to interpret and validate the analysis results. Subject matter experts can provide valuable insights, validate the accuracy of the analysis, and ensure that the findings align with the business objectives and requirements.</li>
<li><strong>Collaborative Approach:</strong> Foster collaboration and communication between data analysts, data scientists, domain experts, and stakeholders. Encourage discussions, feedback, and validation from multiple perspectives to improve the accuracy of the analysis. Collaboration helps in identifying potential biases, validating assumptions, and gaining a holistic understanding of the data and its implications.</li>
</ul>
<h3>Some other interesting things to know:</h3>
<ul style="list-style-type: disc; margin-left: 30px;">
<li>Visit the <a href="https://www.javatpoint.com/data-mining">Data mining tutorial</a></li>
<li>Visit my repository on <a href="https://github.com/arunp77/Database-datapipeline-ETL/tree/main/Database">GitHub for Bigdata, Databases, DBMS, Data modling, Data mining.</a></li>
<li>Visit my website on <a href="sql-basic-details.html">SQL.</a></li>
<li>Visit my website on <a href="sql-postgresql.html">PostgreSQL.</a></li>
<li>Visit my website on <a href="scd.html">Slowly changing variables.</a></li>
<li>Visit my website on <a href="snowflake.html">SNowflake.</a></li>
<li>Visit my website on <a href="sql-project.html">SQL project in postgresql.</a></li>
<li>Visit my website on <a href="snowflake-task=stream.html">Snowflake data streaming.</a></li>
</ul>
<div class="navigation">
<a href="content-page.html" class="clickable-box">
<span class="arrow-right">Go to content</span>
</a>
<a href="index.html" class="clickable-box">
<span class="arrow-left">Go home</span>
</a>
</div>
</div>
</section><!-- End Portfolio Details Section -->
</main><!-- End #main --
<!-- ======= Footer ======= -->
<footer id="footer">
<div class="container">
<div class="copyright">
© Copyright <strong><span>Arun</span></strong>
</div>
</div>
</footer><!-- End Footer -->
<a href="#" class="back-to-top d-flex align-items-center justify-content-center"><i class="bi bi-arrow-up-short"></i></a>
<!-- Vendor JS Files -->
<script src="assets/vendor/purecounter/purecounter_vanilla.js"></script>
<script src="assets/vendor/aos/aos.js"></script>
<script src="assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<script src="assets/vendor/glightbox/js/glightbox.min.js"></script>
<script src="assets/vendor/isotope-layout/isotope.pkgd.min.js"></script>
<script src="assets/vendor/swiper/swiper-bundle.min.js"></script>
<script src="assets/vendor/typed.js/typed.umd.js"></script>
<script src="assets/vendor/waypoints/noframework.waypoints.js"></script>
<script src="assets/vendor/php-email-form/validate.js"></script>
<!-- Template Main JS File -->
<script src="assets/js/main.js"></script>
<script>
document.addEventListener("DOMContentLoaded", function () {
hljs.initHighlightingOnLoad();
});
</script>
</body>
</html>