add projA1

DS-100 · Oct 18, 2024 · 33d653b · 33d653b
1 parent c9409bb
commit 33d653b
Show file tree

Hide file tree

Showing 11 changed files with 754 additions and 2 deletions.
diff --git a/_quarto.yml b/_quarto.yml
@@ -20,7 +20,7 @@ book:
         - pandas/pandas.md
         - regex/regex.md
         - visualizations/visualizations.md
-        # - projA1/projA1.md
+        - projA1/projA1.md
         # - projA2/projA2.md
         # - sql/sql.md
 

diff --git a/docs/autograder_gradescope/autograder_gradescope.html b/docs/autograder_gradescope/autograder_gradescope.html
@@ -147,6 +147,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/index.html b/docs/index.html
@@ -148,6 +148,11 @@
   <div class="sidebar-item-container"> 
   <a href="./visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/jupyter101/jupyter101.html b/docs/jupyter101/jupyter101.html
@@ -147,6 +147,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/jupyter_datahub/jupyter_datahub.html b/docs/jupyter_datahub/jupyter_datahub.html
@@ -147,6 +147,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/pandas/pandas.html b/docs/pandas/pandas.html
@@ -147,6 +147,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/projA1/projA1.html b/docs/projA1/projA1.html
diff --git a/docs/regex/regex.html b/docs/regex/regex.html
@@ -176,6 +176,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>

diff --git a/docs/search.json b/docs/search.json
@@ -348,5 +348,35 @@
     "crumbs": [
       "<span class='chapter-number'>6</span>  <span class='chapter-title'>Visualizations</span>"
     ]
+  },
+  {
+    "objectID": "projA1/projA1.html",
+    "href": "projA1/projA1.html",
+    "title": "Project A1 Common Questions",
+    "section": "",
+    "text": "Question 6",
+    "crumbs": [
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Project A1 Common Questions</span>"
+    ]
+  },
+  {
+    "objectID": "projA1/projA1.html#question-6",
+    "href": "projA1/projA1.html#question-6",
+    "title": "Project A1 Common Questions",
+    "section": "",
+    "text": "TypeError: could not convert string to float: 'SF'\nType errors like these usually stem from applying a numeric aggregation function to a non-numeric column as described in the pandas section of the debugging guide.\nAggregation functions like np.median and np.mean are only well-defined for columns with numeric types like int and float. Your code is likely trying to aggregate across all columns in training_data, including those of type str. Instead of aggregating across the entire DataFrame, try just selecting the relevant columns.\n\n\nTypeError: unhashable type: 'Series'\nThis error can occur if you try and use Python’s in to check whether values in a Series are contained in a list. If you’re trying to perform boolean filtering in this manner, you should look into the .isin (documentation) function as introduced in HW 2.",
+    "crumbs": [
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Project A1 Common Questions</span>"
+    ]
+  },
+  {
+    "objectID": "projA1/projA1.html#question-7",
+    "href": "projA1/projA1.html#question-7",
+    "title": "Project A1 Common Questions",
+    "section": "Question 7",
+    "text": "Question 7\n\nI’m not sure how to use sklearn to do One Hot Encoding\nA good starting point is to revisit the One Hot Encoding question in Lab 7. It’s recommended you look through this portion of the walkthrough, so you have a good understanding of how to use the OneHotEncoder object. Pay attention to what each variable represents and the expected outputs of the functions used. Can you map the logic from the lab to this project? A nice way to start is to make a new cell and experiment with examples from the documentation.\n\n\nMy OHE columns contain a lot of NaN values\nThis may happen if you try and merge the OHE columns with the training_data table without making sure both DataFrame have the same index values. Look into the pd.merge documentation for ways to resolve this.",
+    "crumbs": [
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Project A1 Common Questions</span>"
+    ]
   }
 ]
diff --git a/docs/visualizations/visualizations.html b/docs/visualizations/visualizations.html
@@ -30,6 +30,7 @@
 <script src="../site_libs/quarto-search/fuse.min.js"></script>
 <script src="../site_libs/quarto-search/quarto-search.js"></script>
 <meta name="quarto:offset" content="../">
+<link href="../projA1/projA1.html" rel="next">
 <link href="../regex/regex.html" rel="prev">
 <link href="../data100_logo.png" rel="icon" type="image/png">
 <script src="../site_libs/quarto-html/quarto.js"></script>
@@ -175,6 +176,11 @@
   <div class="sidebar-item-container"> 
   <a href="../visualizations/visualizations.html" class="sidebar-item-text sidebar-link active"><span class="chapter-title">Visualizations</span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../projA1/projA1.html" class="sidebar-item-text sidebar-link"><span class="chapter-title">Project A1 Common Questions</span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -727,6 +733,9 @@ <h2 class="anchored" data-anchor-id="my-sns.lineplot-has-an-unwanted-shaded-regi
       </a>          
   </div>
   <div class="nav-page nav-page-next">
+      <a href="../projA1/projA1.html" class="pagination-link" aria-label="<span class='chapter-number'>7</span>&nbsp; <span class='chapter-title'>Project A1 Common Questions</span>">
+        <span class="nav-page-text"><span class="chapter-title">Project A1 Common Questions</span></span> <i class="bi bi-arrow-right-short"></i>
+      </a>
   </div>
 </nav>
 </div> <!-- /content -->

diff --git a/index.tex b/index.tex
@@ -191,7 +191,7 @@ \chapter*{About}\label{about}
 
 \chapter{Jupyter 101}\label{jupyter-101}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, coltitle=black, left=2mm, opacitybacktitle=0.6, toptitle=1mm, leftrule=.75mm, breakable, titlerule=0mm, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, bottomrule=.15mm, arc=.35mm, colframe=quarto-callout-note-color-frame, rightrule=.15mm, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Note}, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Note}, bottomrule=.15mm, colback=white, colframe=quarto-callout-note-color-frame, toptitle=1mm, rightrule=.15mm, toprule=.15mm, breakable, leftrule=.75mm, titlerule=0mm, arc=.35mm, opacityback=0, coltitle=black, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, left=2mm, opacitybacktitle=0.6]
 
 If you're using a MacBook, replace \texttt{ctrl} with \texttt{cmd}.
 
@@ -1307,6 +1307,61 @@ \section{\texorpdfstring{My \texttt{sns.lineplot} has an unwanted shaded
 If you do not want the shaded region, aggregate the data such that there
 is only one y-value for a given x-value; then, make the plot.
 
+\bookmarksetup{startatroot}
+
+\chapter{Project A1 Common Questions}\label{project-a1-common-questions}
+
+\section{Question 6}\label{question-6}
+
+\subsection{\texorpdfstring{\texttt{TypeError:\ could\ not\ convert\ string\ to\ float:\ \textquotesingle{}SF\textquotesingle{}}}{TypeError: could not convert string to float: \textquotesingle SF\textquotesingle{}}}\label{typeerror-could-not-convert-string-to-float-sf}
+
+Type errors like these usually stem from applying a numeric aggregation
+function to a non-numeric column as described in the
+\href{https://ds100.org/debugging-guide/pandas/pandas.html\#typeerror-could-not-convert-string-to-numeric}{\texttt{pandas}
+section} of the debugging guide.
+
+Aggregation functions like \texttt{np.median} and \texttt{np.mean} are
+only well-defined for columns with numeric types like \texttt{int} and
+\texttt{float}. Your code is likely trying to aggregate across all
+columns in \texttt{training\_data}, including those of type
+\texttt{str}. Instead of aggregating across the entire
+\texttt{DataFrame}, try just selecting the relevant columns.
+
+\subsection{\texorpdfstring{\texttt{TypeError:\ unhashable\ type:\ \textquotesingle{}Series\textquotesingle{}}}{TypeError: unhashable type: \textquotesingle Series\textquotesingle{}}}\label{typeerror-unhashable-type-series}
+
+This error can occur if you try and use Python's \texttt{in} to check
+whether values in a \texttt{Series} are contained in a list. If you're
+trying to perform boolean filtering in this manner, you should look into
+the \texttt{.isin}
+(\href{https://pandas.pydata.org/docs/reference/api/pandas.Series.isin.html}{documentation})
+function as introduced in HW 2.
+
+\section{Question 7}\label{question-7}
+
+\subsection{\texorpdfstring{I'm not sure how to use \texttt{sklearn} to
+do One Hot
+Encoding}{I'm not sure how to use sklearn to do One Hot Encoding}}\label{im-not-sure-how-to-use-sklearn-to-do-one-hot-encoding}
+
+A good starting point is to revisit the One Hot Encoding question in Lab
+7. It's recommended you look through this portion of
+\href{https://youtu.be/LohVOmiulHQ?feature=shared&t=442}{the
+walkthrough}, so you have a good understanding of how to use the
+\texttt{OneHotEncoder} object. Pay attention to what each variable
+represents and the expected outputs of the functions used. Can you map
+the logic from the lab to this project? A nice way to start is to make a
+new cell and experiment with examples from
+\href{https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html}{the
+documentation}.
+
+\subsection{\texorpdfstring{My OHE columns contain a lot of \texttt{NaN}
+values}{My OHE columns contain a lot of NaN values}}\label{my-ohe-columns-contain-a-lot-of-nan-values}
+
+This may happen if you try and merge the OHE columns with the
+\texttt{training\_data} table without making sure both
+\texttt{DataFrame} have the \emph{same index values}. Look into the
+\href{https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.merge.html}{\texttt{pd.merge}
+documentation} for ways to resolve this.
+
 
 
 \end{document}