Skip to content

Commit

Permalink
[ADD] Hive job flow
Browse files Browse the repository at this point in the history
  • Loading branch information
MoustafaAMahmoud committed Sep 30, 2023
1 parent 45993dc commit 602420b
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 93 deletions.
99 changes: 59 additions & 40 deletions Ch03-HadoopMR/Ch03-HadoopMR.tex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
% use no footline.
\begin{frame}[plain, noframenumbering]{Outline}
\tableofcontents
\end{frame}
% \begin{frame}[plain, noframenumbering]{Outline}
% \tableofcontents
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Hive}

Expand All @@ -18,6 +18,7 @@ \section{Hive}
\item<5-> Relational Data Analysis with Hive. \pause
\item<6-> Hive Data Management. \pause
\item<7-> Hive Optimization. \pause
\item<8-> Hive Demo. \pause
\end{itemize}

\end{frame}
Expand Down Expand Up @@ -54,6 +55,17 @@ \subsection{Introduction to hive}
\item Apache Hive bridges the gap between the world of big data and traditional relational databases, making it a valuable tool for data engineers, analysts, and data scientists.\pause
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\frametitle{Hive Query Example}

\begin{lstlisting}[caption={Hive Query Example},language=SQL]
SELECT *
FROM Customers
WHERE Country = 'USA';
\end{lstlisting}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


Expand All @@ -77,15 +89,17 @@ \subsection{Hive Architecture}
\begin{itemize}
\item Hive Clients.
\item Hive Services.
\item Hive Metadata (Metastore).
\item Hive Storage and Computing.
\end{itemize}
\end{frame}

\begin{frame}{Abstract Components of Apache Hive}
\includegraphics[width=\linewidth,height=.8\textheight]{./Figures/chapter-03/Hive_Architecture.jpg}
\end{frame}


\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Architecture.pdf}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Hive Clients}
Expand Down Expand Up @@ -313,44 +327,26 @@ \subsection{Hive Architecture}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Job Execution Flow in Hive}
\begin{frame}{Job Execution Flow in Hive}
\begin{itemize}
\item Receive SQL Query.
\begin{itemize}
\item Parse HiveQL.
\item Make optimization.
\item Plan execution.
\item Submit job(s) to the cluster.
\item Monitor the progress.
\item Process the data in MapReduce or Spark.
\item Store the data in HDFS.
\end{itemize}
\end{itemize}

\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Query_Flow.pdf}

% \begin{itemize}
% \item Receive SQL Query.
% \begin{itemize}
% \item Parse HiveQL.
% \item Make optimization.
% \item Plan execution.
% \item Submit job(s) to the cluster.
% \item Monitor the progress.
% \item Process the data in MapReduce or Spark.
% \item Store the data in HDFS.
% \end{itemize}
% \end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Hive Schema and Data Storage}
\begin{frame}{Hive Schema and Data Storage}
\begin{itemize}
\item Hive queries operate on tables, similar to RDBMS.
\begin{itemize}
\item A table corresponds to a directory in storage (HDFS, S3, GCS, or Azure).
\item Each table comprises one or more files.
\item Every table is associated with a specific file format.
\item Hive stores table structure and location in the metadata store (RDBMS).
\item Hive supports various file formats, such as Parquet, ORC, and Text.
\end{itemize}
\end{itemize}
\end{frame}

\begin{frame}{Hive Schema and Data Storage (Continued)}
\begin{itemize}
\item Hive queries reference the metastore to access table location and structure.
\item While queries interact with the file system, metadata is stored in the RDBMS.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{Performance Tuning}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Query Execution Plan}
\begin{frame}
Expand Down Expand Up @@ -395,10 +391,33 @@ \subsubsection{Query Execution Plan}

\end{frame}
\subsubsection{Cost-Based Optimization}
\subsection{Further Readings and Assignment}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Hive Schema and Data Storage}
\begin{frame}{Hive Schema and Data Storage}
\begin{itemize}
\item Hive queries operate on tables, similar to RDBMS.
\begin{itemize}
\item A table corresponds to a directory in storage (HDFS, S3, GCS, or Azure).
\item Each table comprises one or more files.
\item Every table is associated with a specific file format.
\item Hive stores table structure and location in the metadata store (RDBMS).
\item Hive supports various file formats, such as Parquet, ORC, and Text.
\end{itemize}
\end{itemize}
\end{frame}

\begin{frame}{Hive Schema and Data Storage (Continued)}
\begin{itemize}
\item Hive queries reference the metastore to access table location and structure.
\item While queries interact with the file system, metadata is stored in the RDBMS.
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Further Readings and Assignment}

%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../main"
Expand Down
Binary file modified Figures/chapter-03/Hive_Architecture.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Figures/chapter-03/Hive_Architecture.pdf
Binary file not shown.
Binary file added Figures/chapter-03/Hive_Query_Flow.pdf
Binary file not shown.
Binary file modified main.pdf
Binary file not shown.
23 changes: 21 additions & 2 deletions preamble/code_listing.tex
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
\definecolor{mymauve}{rgb}{0.58,0,0.82}
\definecolor{dkgreen}{rgb}{0,0.6,0}
\definecolor{ltgray}{rgb}{0.5,0.5,0.5}
\usepackage{caption} % Add the caption package

% Redefine the lstlisting format to remove the unwanted prefix
\DeclareCaptionFormat{mylst}{#1#2#3}
\renewcommand\lstlistingname{Code Snippet:}
\renewcommand\lstlistlistingname{Code Snippet:}
%\DeclareCaptionStyle{listing} [justification=raggedright,indention=0pt, labelfont=bf]{}
%\captionsetup[lstlisting]{style=listing, labelsep=none}

\captionsetup[lstlisting]{format=mylst,labelfont=bf,labelsep=space,justification=raggedright}

\lstset{%
frame=tb,
Expand Down Expand Up @@ -37,7 +47,7 @@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\lstset{%
language=sql,
language=SQL,
backgroundcolor=\color{white},
basicstyle=\footnotesize,
breakatwhitespace=false,
Expand All @@ -64,8 +74,17 @@
showtabs=false,
stepnumber=1,
tabsize=4,
title=\lstname
caption=Example SQL Query
}
% \lstset{
% language=SQL,
% basicstyle=\ttfamily,
% keywordstyle=\color{blue},
% commentstyle=\color{green},
% stringstyle=\color{red},
% showstringspaces=false,
% tabsize=2
% }

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Expand Down
4 changes: 3 additions & 1 deletion preamble/preamble.tex
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
\usepackage{import}
\usepackage{marvosym}
\usepackage{ulem} %Strikethrough
\usepackage[inkscapelatex=false]{svg}


%\overfullrule=2cm
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand All @@ -46,7 +48,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\input{preamble/tikz_preamble.tex}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\input{preamble/code_listing.tex}
\input{preamble/code_listing.tex}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{tikz,lipsum,fontspec}
\usetikzlibrary{shapes.callouts,decorations.pathmorphing}
Expand Down
50 changes: 0 additions & 50 deletions tikz_extractor.tex

This file was deleted.

0 comments on commit 602420b

Please sign in to comment.