Skip to content

Commit

Permalink
[ADD] Hive comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
MoustafaAMahmoud committed Oct 3, 2023
1 parent 39aaa80 commit 13bb3cb
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 2 deletions.
57 changes: 56 additions & 1 deletion Ch03-HadoopMR/Ch03-HadoopMR.tex
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,61 @@ \subsubsection{Overview of Apache Hive}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsubsection{Comparing Hive to Traditional Databases}
\begin{frame}{Comparison between Apache Hive and Traditional RDBMS (Part 1)}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
\hline
\rowcolor{Gray}
\textbf{Feature} & \textbf{Apache Hive} & \textbf{Traditional RDBMS} \\
\hline
Purpose & Big data analytics. & Transactional systems and traditional data warehousing. \\
\hline
Query Language & HiveQL, similar to SQL. & SQL. \\
\hline
Speed & Slower, optimized for batch processing. & Faster, optimized for real-time transactional processing. \\
\hline
Data Size & Designed to handle petabytes of data. & gigabytes to terabytes; some systems can handle petabytes at higher cost. \\
\hline
ACID Properties & Limited ACID support. & Full ACID support. \\
\hline
\end{tabular}
}
\caption{Comparison between Apache Hive and Traditional RDBMS}
\end{table}
\end{frame}

\begin{frame}{Comparison between Apache Hive and Traditional RDBMS (Part 2)}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
\hline
\rowcolor{Gray}
\textbf{Feature} & \textbf{Apache Hive} & \textbf{Traditional RDBMS} \\
\hline
Storage & Built on HDFS. & Uses internal storage mechanisms. \\
\hline
Metadata Storage & Stored in Hive Metastore. & Stored in system catalogs within the database. \\
\hline
Compute Engine & MapReduce, Tez, or Spark can be used. & Built-in engine, tightly integrated. \\
\hline
Query Execution Location & Executes on Hadoop cluster nodes. & Executes on the database server. \\
\hline
Data Storage Formats & ORC, Parquet, CSV, JSON, XML, Avro & Proprietary Binary Format \\
\hline
Max Simultaneous Connections & Governed by Hadoop Cluster & Varies (Hundreds to Thousands) \\
\hline
\end{tabular}
}
\caption{Comparison between Apache Hive and Traditional RDBMS}
\end{table}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{Hive Architecture}

\begin{frame}{Abstract Components of Apache Hive}
Expand All @@ -91,7 +146,7 @@ \subsection{Hive Architecture}
\item Hive Services.
\item Hive Metadata (Metastore).
\item Storage.
\item Computing.
\item Compute.
\end{itemize}
\end{frame}

Expand Down
Binary file modified main.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions preamble/preamble.tex
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
\usepackage{marvosym}
\usepackage{ulem} %Strikethrough
\usepackage[inkscapelatex=false]{svg}
\usepackage{caption}
\DeclareCaptionLabelFormat{nospace}{#1#2}
\captionsetup[table]{labelfont={color=harvardcrimson,bf},name=Table T-,labelformat=nospace,labelsep=period}


%\overfullrule=2cm
Expand Down
2 changes: 1 addition & 1 deletion preamble/presentation-mode.tex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
%update for publishing
\setbeamersize{text margin left=5pt,text margin right=3cm}
\setbeamersize{text margin left=5pt,text margin right=3.5cm}
%\setbeamersize{text margin left=5pt}

%%remove logo for publishing
Expand Down

0 comments on commit 13bb3cb

Please sign in to comment.