Skip to content

Commit

Permalink
Add drafted Spark version
Browse files Browse the repository at this point in the history
  • Loading branch information
MoustafaAMahmoud committed Mar 4, 2024
1 parent d8eea58 commit a946d4c
Show file tree
Hide file tree
Showing 13 changed files with 2,029 additions and 61 deletions.
117 changes: 58 additions & 59 deletions Ch03-HadoopMR/Ch03-HadoopMR.tex
Original file line number Diff line number Diff line change
Expand Up @@ -461,62 +461,62 @@ \subsubsection{Hive Table Format}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Text File vs. Binary File in Hive | High Level}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
%\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
\begin{tabular}{|l|l|l|}
\hline
\rowcolor{Gray}
Criteria & Text File & Binary File (e.g., ORC, Parquet) \\
\hline
Readability & Human-readable & Not human-readable \\
\hline
Debugging & Easier & Harder \\
\hline
Storage Size & Larger & More efficient \\
\hline
Speed & Slower & Faster \\
\hline
Delimiters & Basic (e.g., Control-A) & Complex types supported \\
\hline
Suitability & Small datasets & Large datasets \\
\hline
\end{tabular}
}
\caption{Comparison between Text and Binary File Formats in Hive}
\end{table}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Why Binary Format is Faster than Text Format}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
%\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
\begin{tabular}{|l|l|l|}
\hline
\rowcolor{Gray}
Factor & Text File & Binary File \\
\hline
Parsing & Needs conversion & Directly readable \\
\hline
Memory & Less efficient & Efficient \\
\hline
Storage & Larger files & Smaller due to compression \\
\hline
Compression & Basic & Advanced algorithms \\
\hline
IO Operations & More reads/writes & Fewer reads/writes \\
\hline
Schema Evolution & Harder & Easier \\
\hline
\end{tabular}
}
\caption{Summary: Binary formats are generally more efficient in reading, writing, and storing data, making them faster for large datasets.}
\end{table}
\end{frame}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}{Text File vs. Binary File in Hive | High Level}
% \begin{table}[h!]
% \centering
% \resizebox{\textwidth}{!}{%
% %\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
% \begin{tabular}{|l|l|l|}
% \hline
% \rowcolor{Gray}
% Criteria & Text File & Binary File (e.g., ORC, Parquet) \\
% \hline
% Readability & Human-readable & Not human-readable \\
% \hline
% Debugging & Easier & Harder \\
% \hline
% Storage Size & Larger & More efficient \\
% \hline
% Speed & Slower & Faster \\
% \hline
% Delimiters & Basic (e.g., Control-A) & Complex types supported \\
% \hline
% Suitability & Small datasets & Large datasets \\
% \hline
% \end{tabular}
% }
% \caption{Comparison between Text and Binary File Formats in Hive}
% \end{table}
% \end{frame}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}{Why Binary Format is Faster than Text Format}
% \begin{table}[h!]
% \centering
% \resizebox{\textwidth}{!}{%
% %\begin{tabular}{|m{5cm}|m{6cm}|m{6cm}|}
% \begin{tabular}{|l|l|l|}
% \hline
% \rowcolor{Gray}
% Factor & Text File & Binary File \\
% \hline
% Parsing & Needs conversion & Directly readable \\
% \hline
% Memory & Less efficient & Efficient \\
% \hline
% Storage & Larger files & Smaller due to compression \\
% \hline
% Compression & Basic & Advanced algorithms \\
% \hline
% IO Operations & More reads/writes & Fewer reads/writes \\
% \hline
% Schema Evolution & Harder & Easier \\
% \hline
% \end{tabular}
% }
% \caption{Summary: Binary formats are generally more efficient in reading, writing, and storing data, making them faster for large datasets.}
% \end{table}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Hive Data Management}
\subsubsection{Hive Database}
Expand Down Expand Up @@ -812,8 +812,7 @@ \subsubsection{Hive Tables}
CREATE TABLE sales (
product_id INT,
order_date STRING,
amount DOUBLE
)
amount DOUBLE)
PARTITIONED BY (year INT, region STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
Expand Down Expand Up @@ -999,7 +998,7 @@ \subsubsection{Hive Tables}
\frametitle{CREATE TABLE in Hive | continued}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 9: Clustering and Sorting | CLUSTERED BY]
\small
vspace{.2cm}
\vspace{.2cm}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
Expand Down
Loading

0 comments on commit a946d4c

Please sign in to comment.