diff --git a/.gitignore b/.gitignore index 3c1ee6a..6859a07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ # Python env env/ .venv/ + +# Files created when running the code +*_new* diff --git a/01-files.jl b/01-files.jl new file mode 100644 index 0000000..00e46c9 --- /dev/null +++ b/01-files.jl @@ -0,0 +1,99 @@ +# Reading and writing external files + +## CSV: probably the most common type of data file you will find +using CSV +using DataFrames + +# Note: go to the workshop directory before reading the CSV file +# by right-clicking on the desired directory and selecting +# `Julia: Change to this directory +df = CSV.read("demographics.csv", DataFrame) # read(, ) + +# Writing files +## As an example, let's change some column names and then save it +renamed_df = rename( + df, + Dict("AGE" => "AGE (years)", "WEIGHT" => "WEIGHT (kg)") +) + +## Tip: you can rename columns programmatically by passing a function +lowercase_df = rename(lowercase, df) # Make all columns be lowercase + +# Now we are ready to save the new file +CSV.write("demographics_new.csv", renamed_df) # write(, ) +# CSV.write("demographics.csv", renamed_df) # Watch out: This would overwrite our original dataset + +# Check our new files using VS Code + +## Tip: you can read/save data to a folder +CSV.write("data/demographics_new.csv", renamed_df) +CSV.read("data/demographics_new.csv", DataFrame) + +## Custom specifications (keyword arguments): +readlines("demographics_eu.csv")[1:3] +readlines("demographics.csv")[1:3] # Standard format + +# - delim: CSV files are separated by commas most of the time, but sometimes other +# characters like ';' or '\t' are used. +CSV.read("demographics_eu.csv", DataFrame; delim = ';') # Works, but the numbers were parsed as strings + +# - decimal: if the file contains Floats and they are separated by something different than +# '.' (e.g 3.14), you must specify which character is used. If you ever need to use this, +# it will probably be because decimals are separated by commas (e.g 3,14) +CSV.read("demographics_eu.csv", DataFrame; delim = ';', decimal = ',') + +# You can also use these keyword arguments to write files +CSV.write("demographics_eu_new.csv", renamed_df; delim = ';', decimal = ',') +readlines("demographics_eu_new.csv")[1:3] + +# There are many more options: https://csv.juliadata.org/stable/reading.html#CSV.read + +## Excel (.xlsx) +using XLSX + +# Reading files +excel_file = XLSX.readtable("demographics.xlsx", "Sheet1") # readtable(, ) +df_excel = DataFrame(excel_file) # You will most definitely want to convert it to a DataFrame + +## Tip: get all sheets from an Excel file +file = XLSX.readxlsx("demographics.xlsx") # You can see Sheet1 here +XLSX.sheetnames(file) # You can get a vector of sheet names too + +## Tip: you can also use index numbers to refer to sheets +DataFrame(XLSX.readtable("demographics.xlsx", 1)) # We get the first sheet + +# You can also read XLSX files from a folder +DataFrame(XLSX.readtable("data/demographics.xlsx", "Sheet1")) + +# Allow XLSX to infer types (columns will be Any by default) +DataFrame(XLSX.readtable("demographics.xlsx", "Sheet1"; infer_eltypes=true)) # You will most definitely want to infer the columns types + +# Writing files +XLSX.writetable("demographics_new.xlsx", renamed_df) # Same syntax as CSV.write (, ) +XLSX.writetable("data/demographics_new.xlsx", renamed_df) # Save to a folder + +## Watch out: if you try to write a file that already exists, you will get an error +XLSX.writetable("demographics_new.xlsx", lowercase_df) # Won't overwrite, like CSV would + +## SAS files +using ReadStatTables + +# Reading files +## .sas7bdat +DataFrame(readstat("iv_bolus_sd.sas7bdat")) +## .xpt +DataFrame(readstat("iv_bolus_sd.xpt")) + +## Note: ReadStatTables supports other file formats: +## https://junyuan-chen.github.io/ReadStatTables.jl/stable/#Supported-File-Formats + +# Writing files +## Currently, ReadStatTables only supports reading files (writing is experimental only) + +############################################################################################## +# Optional: run this to delete all the files created in the examples +begin + root_files = filter(contains("new"), readdir()) + data_files = joinpath.("data", filter(contains("new"), readdir("data"))) + foreach(rm, vcat(root_files, data_files)) +end diff --git a/02-select_subset.jl b/02-select_subset.jl new file mode 100644 index 0000000..7cca938 --- /dev/null +++ b/02-select_subset.jl @@ -0,0 +1,64 @@ +# We often want to retrieve only certain parts of a DataFrame +df = CSV.read("demographics.csv", DataFrame) # Load the demographics dataset from before + +# Columns +names(df) # Get all column names + +## Get a single column as a vector +df.AGE # DataFrame.column_name +df.WEIGHT + +df[!, "AGE"] # Indexing, as if it was a matrix +df[!, "WEIGHT"] + +## Tip: get a copy of the column (instead of the actual column) +df[:, "AGE"] # If you modify this, you won't be modifying the original DataFrame + +## Get multiple columns +df[!, ["AGE", "WEIGHT"]] # This gets messy quickly + +### @select macro +using DataFramesMeta # You don't need to import DataFrames if you import DataFramesMeta + +@select df :AGE :WEIGHT # We use Symbols instead of Strings +@select(df, :AGE, :WEIGHT) # We can also call it in a similar way to functions + +@select df begin # block syntax, probably the best alternative for multiple columns + :ID + :AGE + :WEIGHT +end + +## Tip: select columns the other way around +@select df $(Not([:AGE, :WEIGHT])) # Get all columns, except the ones we specify + +# Rows +## Indexing +df[1:10, ["AGE", "WEIGHT"]] # Get the first 10 rows +df[4:16, All()] # Get rows 4 to 16 for all columns + +## The @subset macro +## Allows selecting rows based on conditional statements +@subset df :AGE .> 60 # Get all subjects that are more than 60 years old + +# You can also have multiple conditions +@subset df begin + :AGE .> 60 + :ISMALE .== 1 # Get males only + :WEIGHT .< 50 # Get subjects that weigh less than 50 kg +end + +## Tip: use @rsubset instead of broadcasting everything (.>, .==, etc.) +@rsubset df begin + :AGE > 60 + :ISMALE == 1 + :WEIGHT < 50 +end + +## You don't always want to use @rsubset +@rsubset df :WEIGHT > mean(:WEIGHT) +@subset df :WEIGHT .> mean(:WEIGHT) + +## Common use case: remove rows that have missing values in one column +df_iv = DataFrame(readstat("iv_bolus_sd.xpt")) +@rsubset df_iv !ismissing(:conc) \ No newline at end of file diff --git a/03-transform.jl b/03-transform.jl new file mode 100644 index 0000000..b4e607d --- /dev/null +++ b/03-transform.jl @@ -0,0 +1,40 @@ +# Apply some transformation to one or more columns in our data +include("02-select.jl") + +# Change the sex encoding (ISMALE) +df +@transform df :SEX = [i == 0 ? "Female" : "Male" for i in :ISMALE] # Create a new column +@transform df :ISMALE = [i == 0 ? "Female" : "Male" for i in :ISMALE] # Modify an existing column + +## Tip: use @rtransform to avoid specifying the entire column at once +@rtransform df :SEX = :ISMALE == 0 ? "Female" : "Male" +@rtransform df :ISMALE = :ISMALE == 0 ? "Female" : "Male" + +# You can also apply multiple transformations at once +@rtransform df begin + :ISMALE = :ISMALE == 0 ? " Female" : "Male" + :AGE = Int(round(:AGE, digits=0)) # Round age to an integer + :AGE_months = :AGE * 12 # Calculate age in months +end + +# Notice that our age in months was not computed from the rounded version of the AGE column +## We have to use @astable to be able to use intermediate results +@rtransform df @astable begin + :AGE = Int(round(:AGE, digits=0)) + :AGE_months = :AGE * 12 +end + +# Modify the original DataFrame +@rtransform df :SEX = :ISMALE == 0 ? "Female" : "Male" # Creates a new DataFrame +df # Our original DataFrame remains unchanged + +@rtransform! df :SEX = :ISMALE == 0 ? "Female" : "Male" # Use ! at the end to modify the source +df # Watch out: we lost the original DataFrame (we would have to reread our source file) + +## Tip: this works for all of DataFramesMeta.jl's macros +@rsubset! df :SEX == "Female" +df # Now we only have female subjects + +@select! df :AGE :WEIGHT :SEX +df # Now we lost the rest of the columns + diff --git a/04-grouping.jl b/04-grouping.jl new file mode 100644 index 0000000..c6c64b4 --- /dev/null +++ b/04-grouping.jl @@ -0,0 +1,39 @@ +# Some times we want to group our data and apply operations according to that grouping +df = CSV.read("demographics.csv", DataFrame) # Load a fresh copy of our dataset + +# The groupby function +groupby(df, :ISMALE) # Group subjects according to sex + +## More complicated example: @transform + groupby +@rtransform! df :WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" +groupby(df, :WEIGHT_cat) + +## Tip: groupby can take multiple columns as grouping keys +groupby(df, [:ISMALE, :WEIGHT_cat]) # Now we get 4 groups + +# Summarizing (@combine) +## A common thing to do after grouping data is to combine it back with some operation. + +# Example: mean age for each sex group +grouped_df = groupby(df, :ISMALE) +@combine grouped_df :AGE = mean(:AGE) +mean((@rsubset df :ISMALE == 0).AGE) # Check the results + +# You can also use DataFrames that have been grouped with multiple columns +combined_df = @combine groupby(df, [:WEIGHT_cat, :ISMALE]) :AGE = mean(:AGE) +@orderby combined_df :ISMALE # Fix awkward ordering with @orderby +@orderby combined_df :ISMALE :WEIGHT_cat # Use multiple columns in @orderby + +## Tip: you can include multiple calculations inside of @combine +@combine grouped_df begin + :AGE = mean(:AGE) + :WEIGHT = mean(:WEIGHT) + :n = length(:AGE) # Calculate the number of subjects for each group +end + +# the @by macro: groupby + @combine in one call +@by df :ISMALE begin + :AGE = mean(:AGE) + :WEIGHT = mean(:WEIGHT) + :n = length(:AGE) +end diff --git a/05-chaining.jl b/05-chaining.jl new file mode 100644 index 0000000..34f80a6 --- /dev/null +++ b/05-chaining.jl @@ -0,0 +1,37 @@ +# Perform all your data wrangling operations in one block with @chain +df = CSV.read("demographics.csv", DataFrame) + +# Get ages for all female subjects +@chain df begin + @rsubset :ISMALE == 0 + @select :ID :AGE # We didn't have to pass df as an argument +end + +# More complicated example +@chain df begin + + @rtransform begin + :SEX = :ISMALE == 0 ? "Female" : "Male" # Create the new sex column + :WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" # Create weight categories + end + + @by [:SEX, :WEIGHT_cat] begin # Calculate mean values for each column + :AGE = mean(:AGE) + :SCR = mean(:SCR) + :eGFR = mean(:eGFR) + :n = length(:AGE) + end + + @orderby :SEX :WEIGHT_cat # Fix ordering + + # Make column names more readable + rename( + Dict( + :SEX => :Sex, + :WEIGHT_cat => :Weight, + :AGE => :Age + ) + ) + +end + diff --git a/README.md b/README.md index 950f9a3..bb1682a 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,7 @@ -# Pumas-AI Workshop Templates +# Pumas-AI Data Wrangling Workshop [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) -## How to use this template - -1. Click on the green button `Use this template` -1. Edit all the `PLACEHOLDER` in `mkdocs.yml` with respect to `site_name`, `repo_name` and `repo_url`. -1. Edit all the `PLACEHOLDER` in `docs/index.md`, `docs/reference.md` and `docs/instructor.md`. -1. Add appropriate content to `docs/index.md`, `docs/reference.md` and `docs/instructor.md`. - ## How to contribute We use [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) @@ -22,9 +15,7 @@ We use [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) ## Authors -- Author 1 - -- Author 2 - -- Author 3 - +- Juan José González Oneto - ## License diff --git a/data/demographics.csv b/data/demographics.csv new file mode 100644 index 0000000..5f23f09 --- /dev/null +++ b/data/demographics.csv @@ -0,0 +1,101 @@ +ID,AGE,WEIGHT,SCR,ISMALE,eGFR +1,34.823,38.212,1.1129,0,42.635 +2,32.765,74.838,0.8846,1,126.0 +3,35.974,37.303,1.1004,1,48.981 +4,38.206,32.969,1.1972,1,38.934 +5,33.559,47.139,1.5924,0,37.198 +6,53.758,50.819,1.6769,0,30.855 +7,25.306,59.304,0.97666,0,82.217 +8,39.897,26.452,1.0817,0,28.899 +9,54.975,26.931,0.90926,0,29.731 +10,40.732,50.878,0.87513,1,80.156 +11,38.603,76.539,0.9541,0,96.028 +12,48.539,113.91,1.0889,0,112.95 +13,28.818,65.829,1.3689,0,63.118 +14,60.933,63.769,0.94223,1,74.322 +15,57.703,37.54,1.3098,1,32.758 +16,32.41,70.069,0.84661,0,105.12 +17,33.799,49.636,1.0998,1,66.572 +18,48.453,59.107,1.1961,0,53.408 +19,40.826,74.732,1.1407,0,76.702 +20,71.319,56.43,0.8041,0,56.9 +21,42.783,92.953,1.3023,1,96.374 +22,33.136,52.328,1.4894,1,52.148 +23,48.292,57.359,1.1164,0,55.624 +24,33.338,58.429,1.2144,0,60.586 +25,29.682,43.66,1.1875,0,47.883 +26,31.032,27.411,0.95849,1,43.282 +27,51.144,72.894,1.3975,0,54.714 +28,35.21,48.149,0.96081,0,61.995 +29,79.292,39.792,1.1153,1,30.084 +30,36.843,45.248,0.93339,0,59.037 +31,19.187,47.021,0.77409,1,101.93 +32,55.554,60.889,0.98239,1,72.695 +33,41.275,98.419,1.2289,1,109.82 +34,49.346,33.442,0.89148,0,40.147 +35,75.654,60.327,0.99383,1,54.249 +36,49.241,53.166,1.1897,1,56.333 +37,32.226,27.841,1.1696,0,30.288 +38,47.13,65.791,0.91439,1,92.807 +39,44.668,65.805,0.76055,0,97.378 +40,32.888,63.813,1.4133,0,57.096 +41,37.504,61.231,1.0209,1,85.382 +42,42.549,45.632,1.3726,0,38.247 +43,56.144,67.582,0.79364,1,99.178 +44,34.77,57.067,0.98974,0,71.629 +45,31.832,38.174,1.4367,0,33.93 +46,30.435,48.515,1.563,1,47.233 +47,41.047,93.403,1.1259,0,96.911 +48,53.297,37.35,1.22,0,31.337 +49,35.503,43.038,0.97497,1,64.067 +50,41.404,47.253,1.3553,1,47.742 +51,37.669,41.304,1.0758,0,46.381 +52,41.054,45.695,1.1881,1,52.855 +53,43.664,24.765,0.88557,0,31.805 +54,43.823,47.549,1.0754,1,59.065 +55,58.715,20.427,1.2121,1,19.026 +56,47.693,44.151,0.99899,0,48.161 +57,44.787,37.563,1.3257,1,37.469 +58,60.371,31.4,1.1461,1,30.3 +59,25.971,64.709,1.2816,1,79.966 +60,27.241,42.836,0.97056,0,58.752 +61,46.255,40.175,1.0276,1,50.904 +62,26.975,56.213,1.2438,1,70.949 +63,41.258,38.281,1.1347,0,39.329 +64,74.954,52.581,1.1033,1,43.053 +65,28.897,55.729,1.3016,0,56.16 +66,35.299,49.359,1.1729,0,52.014 +67,46.802,63.445,1.3773,0,50.682 +68,43.82,63.29,1.1943,1,70.787 +69,35.988,52.836,1.185,0,54.75 +70,21.981,56.093,0.97028,0,80.548 +71,32.63,71.382,1.0415,0,86.872 +72,50.332,43.704,1.0128,0,45.678 +73,51.046,40.91,0.97496,0,44.066 +74,42.821,35.829,1.2623,0,32.564 +75,50.054,98.706,1.2275,0,85.385 +76,40.431,42.513,0.90545,0,55.191 +77,71.791,42.516,1.61,1,25.018 +78,39.383,58.177,1.4767,1,55.054 +79,52.219,26.453,1.102,1,29.265 +80,45.822,98.223,0.8463,0,129.04 +81,66.551,38.448,0.93366,1,42.009 +82,44.64,35.093,1.4036,1,33.114 +83,46.244,39.316,1.0477,0,41.534 +84,27.477,58.911,1.1565,1,79.608 +85,34.575,54.387,1.0842,1,73.449 +86,43.471,55.549,1.4283,1,52.141 +87,62.199,49.899,1.612,1,33.448 +88,41.567,42.826,1.3073,0,38.069 +89,41.272,54.868,1.3139,0,48.674 +90,38.507,49.696,0.99456,0,59.871 +91,30.317,44.18,1.06,1,63.496 +92,44.131,36.558,1.395,1,34.894 +93,20.009,54.23,0.96673,0,79.463 +94,69.412,52.667,1.0861,0,40.409 +95,43.751,48.553,1.2586,1,51.569 +96,31.245,58.631,1.0564,1,83.832 +97,45.48,53.108,1.7202,1,40.53 +98,61.124,27.529,1.0137,0,25.286 +99,33.803,22.206,0.87682,1,37.354 +100,40.145,58.733,1.2478,0,55.487 diff --git a/data/demographics.xlsx b/data/demographics.xlsx new file mode 100644 index 0000000..2396c0f Binary files /dev/null and b/data/demographics.xlsx differ diff --git a/demographics.csv b/demographics.csv new file mode 100644 index 0000000..5f23f09 --- /dev/null +++ b/demographics.csv @@ -0,0 +1,101 @@ +ID,AGE,WEIGHT,SCR,ISMALE,eGFR +1,34.823,38.212,1.1129,0,42.635 +2,32.765,74.838,0.8846,1,126.0 +3,35.974,37.303,1.1004,1,48.981 +4,38.206,32.969,1.1972,1,38.934 +5,33.559,47.139,1.5924,0,37.198 +6,53.758,50.819,1.6769,0,30.855 +7,25.306,59.304,0.97666,0,82.217 +8,39.897,26.452,1.0817,0,28.899 +9,54.975,26.931,0.90926,0,29.731 +10,40.732,50.878,0.87513,1,80.156 +11,38.603,76.539,0.9541,0,96.028 +12,48.539,113.91,1.0889,0,112.95 +13,28.818,65.829,1.3689,0,63.118 +14,60.933,63.769,0.94223,1,74.322 +15,57.703,37.54,1.3098,1,32.758 +16,32.41,70.069,0.84661,0,105.12 +17,33.799,49.636,1.0998,1,66.572 +18,48.453,59.107,1.1961,0,53.408 +19,40.826,74.732,1.1407,0,76.702 +20,71.319,56.43,0.8041,0,56.9 +21,42.783,92.953,1.3023,1,96.374 +22,33.136,52.328,1.4894,1,52.148 +23,48.292,57.359,1.1164,0,55.624 +24,33.338,58.429,1.2144,0,60.586 +25,29.682,43.66,1.1875,0,47.883 +26,31.032,27.411,0.95849,1,43.282 +27,51.144,72.894,1.3975,0,54.714 +28,35.21,48.149,0.96081,0,61.995 +29,79.292,39.792,1.1153,1,30.084 +30,36.843,45.248,0.93339,0,59.037 +31,19.187,47.021,0.77409,1,101.93 +32,55.554,60.889,0.98239,1,72.695 +33,41.275,98.419,1.2289,1,109.82 +34,49.346,33.442,0.89148,0,40.147 +35,75.654,60.327,0.99383,1,54.249 +36,49.241,53.166,1.1897,1,56.333 +37,32.226,27.841,1.1696,0,30.288 +38,47.13,65.791,0.91439,1,92.807 +39,44.668,65.805,0.76055,0,97.378 +40,32.888,63.813,1.4133,0,57.096 +41,37.504,61.231,1.0209,1,85.382 +42,42.549,45.632,1.3726,0,38.247 +43,56.144,67.582,0.79364,1,99.178 +44,34.77,57.067,0.98974,0,71.629 +45,31.832,38.174,1.4367,0,33.93 +46,30.435,48.515,1.563,1,47.233 +47,41.047,93.403,1.1259,0,96.911 +48,53.297,37.35,1.22,0,31.337 +49,35.503,43.038,0.97497,1,64.067 +50,41.404,47.253,1.3553,1,47.742 +51,37.669,41.304,1.0758,0,46.381 +52,41.054,45.695,1.1881,1,52.855 +53,43.664,24.765,0.88557,0,31.805 +54,43.823,47.549,1.0754,1,59.065 +55,58.715,20.427,1.2121,1,19.026 +56,47.693,44.151,0.99899,0,48.161 +57,44.787,37.563,1.3257,1,37.469 +58,60.371,31.4,1.1461,1,30.3 +59,25.971,64.709,1.2816,1,79.966 +60,27.241,42.836,0.97056,0,58.752 +61,46.255,40.175,1.0276,1,50.904 +62,26.975,56.213,1.2438,1,70.949 +63,41.258,38.281,1.1347,0,39.329 +64,74.954,52.581,1.1033,1,43.053 +65,28.897,55.729,1.3016,0,56.16 +66,35.299,49.359,1.1729,0,52.014 +67,46.802,63.445,1.3773,0,50.682 +68,43.82,63.29,1.1943,1,70.787 +69,35.988,52.836,1.185,0,54.75 +70,21.981,56.093,0.97028,0,80.548 +71,32.63,71.382,1.0415,0,86.872 +72,50.332,43.704,1.0128,0,45.678 +73,51.046,40.91,0.97496,0,44.066 +74,42.821,35.829,1.2623,0,32.564 +75,50.054,98.706,1.2275,0,85.385 +76,40.431,42.513,0.90545,0,55.191 +77,71.791,42.516,1.61,1,25.018 +78,39.383,58.177,1.4767,1,55.054 +79,52.219,26.453,1.102,1,29.265 +80,45.822,98.223,0.8463,0,129.04 +81,66.551,38.448,0.93366,1,42.009 +82,44.64,35.093,1.4036,1,33.114 +83,46.244,39.316,1.0477,0,41.534 +84,27.477,58.911,1.1565,1,79.608 +85,34.575,54.387,1.0842,1,73.449 +86,43.471,55.549,1.4283,1,52.141 +87,62.199,49.899,1.612,1,33.448 +88,41.567,42.826,1.3073,0,38.069 +89,41.272,54.868,1.3139,0,48.674 +90,38.507,49.696,0.99456,0,59.871 +91,30.317,44.18,1.06,1,63.496 +92,44.131,36.558,1.395,1,34.894 +93,20.009,54.23,0.96673,0,79.463 +94,69.412,52.667,1.0861,0,40.409 +95,43.751,48.553,1.2586,1,51.569 +96,31.245,58.631,1.0564,1,83.832 +97,45.48,53.108,1.7202,1,40.53 +98,61.124,27.529,1.0137,0,25.286 +99,33.803,22.206,0.87682,1,37.354 +100,40.145,58.733,1.2478,0,55.487 diff --git a/demographics.xlsx b/demographics.xlsx new file mode 100644 index 0000000..2396c0f Binary files /dev/null and b/demographics.xlsx differ diff --git a/demographics_eu.csv b/demographics_eu.csv new file mode 100644 index 0000000..050e338 --- /dev/null +++ b/demographics_eu.csv @@ -0,0 +1,101 @@ +ID;AGE;WEIGHT;SCR;ISMALE;eGFR +1;34,823;38,212;1,1129;0;42,635 +2;32,765;74,838;0,8846;1;126,0 +3;35,974;37,303;1,1004;1;48,981 +4;38,206;32,969;1,1972;1;38,934 +5;33,559;47,139;1,5924;0;37,198 +6;53,758;50,819;1,6769;0;30,855 +7;25,306;59,304;0,97666;0;82,217 +8;39,897;26,452;1,0817;0;28,899 +9;54,975;26,931;0,90926;0;29,731 +10;40,732;50,878;0,87513;1;80,156 +11;38,603;76,539;0,9541;0;96,028 +12;48,539;113,91;1,0889;0;112,95 +13;28,818;65,829;1,3689;0;63,118 +14;60,933;63,769;0,94223;1;74,322 +15;57,703;37,54;1,3098;1;32,758 +16;32,41;70,069;0,84661;0;105,12 +17;33,799;49,636;1,0998;1;66,572 +18;48,453;59,107;1,1961;0;53,408 +19;40,826;74,732;1,1407;0;76,702 +20;71,319;56,43;0,8041;0;56,9 +21;42,783;92,953;1,3023;1;96,374 +22;33,136;52,328;1,4894;1;52,148 +23;48,292;57,359;1,1164;0;55,624 +24;33,338;58,429;1,2144;0;60,586 +25;29,682;43,66;1,1875;0;47,883 +26;31,032;27,411;0,95849;1;43,282 +27;51,144;72,894;1,3975;0;54,714 +28;35,21;48,149;0,96081;0;61,995 +29;79,292;39,792;1,1153;1;30,084 +30;36,843;45,248;0,93339;0;59,037 +31;19,187;47,021;0,77409;1;101,93 +32;55,554;60,889;0,98239;1;72,695 +33;41,275;98,419;1,2289;1;109,82 +34;49,346;33,442;0,89148;0;40,147 +35;75,654;60,327;0,99383;1;54,249 +36;49,241;53,166;1,1897;1;56,333 +37;32,226;27,841;1,1696;0;30,288 +38;47,13;65,791;0,91439;1;92,807 +39;44,668;65,805;0,76055;0;97,378 +40;32,888;63,813;1,4133;0;57,096 +41;37,504;61,231;1,0209;1;85,382 +42;42,549;45,632;1,3726;0;38,247 +43;56,144;67,582;0,79364;1;99,178 +44;34,77;57,067;0,98974;0;71,629 +45;31,832;38,174;1,4367;0;33,93 +46;30,435;48,515;1,563;1;47,233 +47;41,047;93,403;1,1259;0;96,911 +48;53,297;37,35;1,22;0;31,337 +49;35,503;43,038;0,97497;1;64,067 +50;41,404;47,253;1,3553;1;47,742 +51;37,669;41,304;1,0758;0;46,381 +52;41,054;45,695;1,1881;1;52,855 +53;43,664;24,765;0,88557;0;31,805 +54;43,823;47,549;1,0754;1;59,065 +55;58,715;20,427;1,2121;1;19,026 +56;47,693;44,151;0,99899;0;48,161 +57;44,787;37,563;1,3257;1;37,469 +58;60,371;31,4;1,1461;1;30,3 +59;25,971;64,709;1,2816;1;79,966 +60;27,241;42,836;0,97056;0;58,752 +61;46,255;40,175;1,0276;1;50,904 +62;26,975;56,213;1,2438;1;70,949 +63;41,258;38,281;1,1347;0;39,329 +64;74,954;52,581;1,1033;1;43,053 +65;28,897;55,729;1,3016;0;56,16 +66;35,299;49,359;1,1729;0;52,014 +67;46,802;63,445;1,3773;0;50,682 +68;43,82;63,29;1,1943;1;70,787 +69;35,988;52,836;1,185;0;54,75 +70;21,981;56,093;0,97028;0;80,548 +71;32,63;71,382;1,0415;0;86,872 +72;50,332;43,704;1,0128;0;45,678 +73;51,046;40,91;0,97496;0;44,066 +74;42,821;35,829;1,2623;0;32,564 +75;50,054;98,706;1,2275;0;85,385 +76;40,431;42,513;0,90545;0;55,191 +77;71,791;42,516;1,61;1;25,018 +78;39,383;58,177;1,4767;1;55,054 +79;52,219;26,453;1,102;1;29,265 +80;45,822;98,223;0,8463;0;129,04 +81;66,551;38,448;0,93366;1;42,009 +82;44,64;35,093;1,4036;1;33,114 +83;46,244;39,316;1,0477;0;41,534 +84;27,477;58,911;1,1565;1;79,608 +85;34,575;54,387;1,0842;1;73,449 +86;43,471;55,549;1,4283;1;52,141 +87;62,199;49,899;1,612;1;33,448 +88;41,567;42,826;1,3073;0;38,069 +89;41,272;54,868;1,3139;0;48,674 +90;38,507;49,696;0,99456;0;59,871 +91;30,317;44,18;1,06;1;63,496 +92;44,131;36,558;1,395;1;34,894 +93;20,009;54,23;0,96673;0;79,463 +94;69,412;52,667;1,0861;0;40,409 +95;43,751;48,553;1,2586;1;51,569 +96;31,245;58,631;1,0564;1;83,832 +97;45,48;53,108;1,7202;1;40,53 +98;61,124;27,529;1,0137;0;25,286 +99;33,803;22,206;0,87682;1;37,354 +100;40,145;58,733;1,2478;0;55,487 diff --git a/docs/code_of_conduct.md b/docs/code_of_conduct.md index 29de375..cac745e 100644 --- a/docs/code_of_conduct.md +++ b/docs/code_of_conduct.md @@ -1,5 +1,5 @@ --- -title: Code of Conduct for Pumas-AI Workshop PLACEHOLDER +title: Code of Conduct for Pumas-AI Data Wrangling Workshop description: Participants and Instructors must follow this at all times. --- diff --git a/docs/contribute.md b/docs/contribute.md index 29f23fc..57807ab 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -5,7 +5,7 @@ title: How to Contribute [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) If you want to contribute to this workshop, -please open a pull request at [`PumasAI-Labs/PLACEHOLDER`](https://github.com/PumasAI-Labs/PLACEHOLDER). +please open a pull request at [`PumasAI-Labs/Data-Wrangling`](https://github.com/PumasAI-Labs/Data-Wrangling). By submitting a pull request, you are in accordance that your contribution will be licensed under [Creative Commons Attribution-ShareAlike 4.0 International](http://creativecommons.org/licenses/by-sa/4.0/). diff --git a/docs/index.md b/docs/index.md index 8d4caa1..f8bf0b6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,24 +1,48 @@ --- -title: Pumas-AI Workshop PLACEHOLDER -description: CHANGE ME. +title: Pumas-AI Data Wrangling Workshop +description: Template for data wrangling workshop covering data I/O and the use of DataFramesMeta. --- [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) -Short summary about the workshop. +This workshop is an **introduction to data wrangling in Julia** with a focus on data I/O and `DataFramesMeta.jl`. We will cover the following topics: + +- **Reading and writing data**: + - CSV files + - Excel (`.xlsx`) files + - SAS (`.sasb7dat` and `.xpt`) + +- **Select**: + - Selecting specific columns and rows using `@select` and `@subset` macros + +- **Transform**: + - Applying transformations to one or more columns using the `@transform` macro + +- **Grouping and combining**: + - Grouping data using the `groupby` function + - Combining groups and summarizing data using the `@combine` and `@by` macros + +- **Chaining**: + - Perform all data wrangling operations in a single block using the `@chain` macro !!! success "Prerequisites" - This workshop does PLACEHOLDER and PLACEHOLDER. - We recommend users being familiar with PLACEHOLDER, especially PLACEHOLDER. + We recommend users being familiar with Julia syntax, especially variables and types. - The formal requirements are the PLACEHOLDER WORKSHOP WITH LINK. + The formal requirements are the [Julia Syntax Workshop](https://pumasai-labs.github.io/Julia-Workshop/) + and its pre-requisites. ## Schedule -| Time (HH:MM) | Activity | Description | -| ------------ | -------- | ---------------------------------------- | -| 00:00 | Setup | Download files required for the workshop | +| Time (HH:MM) | Activity | Description | +|--------------|--------------------------|---------------------------------------------| +| 00:00 | Setup | Download files required for the workshop | +| 00:25 | Reading and writing data | Showcase `01-files.jl` | +| 00:40 | Select | Showcase `02-select_subset.jl` | +| 00:50 | Transform | Showcase `03-transform.jl` | +| 01:00 | Grouping and combining | Showcase `04-grouping.jl` | +| 01:05 | Chaining | Showcase `05-chaining.jl` | +| 01:15 | Closing remarks | See if there are any questions and feedback | ## Get in touch @@ -27,9 +51,7 @@ please send an email to . ## Authors -- Author 1 - -- Author 2 - -- Author 3 - +- Juan José González Oneto - ## License diff --git a/docs/instructors.md b/docs/instructors.md index d39958f..7958994 100644 --- a/docs/instructors.md +++ b/docs/instructors.md @@ -1,10 +1,74 @@ --- -title: Instructor's Notes for Pumas-AI Workshop PLACEHOLDER +title: Instructor's Notes for Pumas-AI Data Wrangling Workshop --- [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) -PLACEHOLDER. +Start with `01-files.jl`, which covers file handling in Julia. Begin by emphasizing the significance of working in the correct +directory before reading or writing data and how omitting this consideration could lead to errors. Show how to use the `pwd` function to verify the present +working directory and how to use `cd` to navigate to another directory if needed. Some users might find it more convenient to right click on the file and use +the `Julia: Change to This Directory` option, which will automatically move the Julia REPL to the directory containing the selected file. If there are +participants who know how to use shell commands, you can mention how to enter the `shell>` mode in the REPL by typing `;`. Next, focus on the CSV format. Make +sure to highlight the importance of this format and provide an in-depth explanation of how to read and write CSV files to the present working directory and +to a different data folder. One of the examples provided involves using the `rename` function, so make sure to go over how it can be used to change column names +in a `DataFrame`. + +Next, go over the use of the `XLSX.jl` package to read Excel files. Start by explaining how to read an Excel file using `XLSX.readtable`, emphasizing that it is +required to provide the sheet name as an argument and that most of the time, you will want to convert the output from `XLSX.readtable` to a `DataFrame`. +There may be questions about what to do if the user doesn't know the sheet names, which you can address by showing how to use `XLSX.readxlsx` and +`XLSX.sheetnames` to obtain a list of sheet names in an Excel file. You might also find it useful to demonstrate how to open an Excel file inside of +VS Code (using the Office Viewer extension, which is installed by default in JuliaHub). Once you have covered how to read files, show how to write files. Make +sure to mention that `XLSX.jl` will not override an existing file like `CSV.jl` would. Instead, you will get an error if you try to create a file that +already exists. + +The last topic for `01-files.jl` is SAS files (`.sasb7dat` and `.xpt`), which can be read using the `readstat` function from the `ReadStatTables.jl` package. +However, note that the current version of `ReadStatTables.jl` only supports reading files, and write support is still experimental. + +Next, go over the contents of `02-select_subset.jl`. First, discuss the `names` function, which allows us to obtain a `Vector` containing all the column +names of a `DataFrame`, which could be useful when working with `DataFrames` that have a large number of columns. After that, show the different alternatives +that there are to retrieve the contents of a single column (dot syntax such as `DataFrame.column_name` and indexing). Participants might be curious about the +difference between these two methods. If that is the case, you can explain that the dot syntax is simpler and more convenient to type, but that indexing is more +flexible and powerful. Additionally, some users could find the indexing syntax more intuitive, even if it is more verbose. When +going over indexing, make sure to explain the difference between using `!` and using `:` to retrieve all rows from a column (`!` +returns the column, while `:` returns a copy of it). + +Afterward, showcase how to select specific columns from a `DataFrame` using the `@select` macro provided by `DataFramesMeta.jl`. This will be the first +time in the workshop in which attendees will use `DataFramesMeta.jl`, so you can take this opportunity to provide a brief overview of the package and its +importance. Make sure to mention that `DataFramesMeta.jl` imports the contents of `DataFrames.jl`, so it's not necessary to import `DataFrames.jl` if `DataFramesMeta.jl` +has already been imported. Lastly, demonstrate the use of the `Not` operator as a means to specify the columns that we **don't** want to select, which might +be useful in cases where there is a large number of columns and we want to select most of them. + +Finally, cover the `@[r]subset` macro, which enables us to filter rows in a `DataFrame` based on specific conditions. Go over the differences between `@subset` +and `@rsubset` in detail, as this concept will be used in the scripts that follow. Finish this part of the lesson by going over the common use case of removing +rows with `missing` observations in a specific column. + +The next script in the workshop is `03-transform.jl`, which focuses on using the `@[r]transform` macro to create a new column in a `DataFrame` or modify an +existing one. Once again, it is important to explain the difference between the column and row versions of the macro (`@transform` and `@rtransform`, +respectively) and demonstrate how the latter provides a more convenient way of specifying column transformations whenever possible. + +After that, introduce the `@astable` macro, which enables accessing intermediate calculations within a `DataFramesMeta.jl` macro call. This macro allows performing +operations on multiple columns simultaneously, making it easier to apply complex transformations and computations that would otherwise be challenging to write +and comprehend. + +Lastly, cover the mutating version of the macros, which allow direct modification of the original `DataFrame`. Make sure to explain that these macros can be +accessed by appending an exclamation mark (`!`) at the end of the macro call, such as `@[r]transform!` or `select!`. This feature is particularly handy when +there is a need to update or transform data in-place, eliminating the requirement for creating additional copies of the `DataFrame`. + +Move on to the `04-grouping.jl` script. Begin by showing the `groupby` function, which allows grouping data based on specific columns. If users are curious +about the return values of `groupby`, you can mention that it returns a `GroupedDataFrame`, which can be inspected through indexing and manipulated with +`transform` and `select` (you can find more details about it in [`groupby`'s documentation](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)). Next, +show the common pattern of using `groupby` with `@combine` to apply operations on grouped data and generate aggregated results. Make sure to go over +the examples and cover the cases where one or more columns are used to group data. One of the examples includes the use of the `@orderby` macro, so take this +opportunity to provide a detailed explanation of how it works. + +Once participants are comfortable with using `groupby` and `@combine`, you can introduce the `@by` macro, which provides a concise alternative to using +`groupby` and `@combine` by streamlining the process of grouping data and applying operations in a single call. Use the example provided in the script to show a +direct comparison between the methods and mention how using `@by` simplifies the code and enhances readability. + +The last script of the workshop is `05-chaining.jl`. This script provides two examples of how to use the `@chain` macro to perform all data wrangling operations +in a single block. Go over the examples and highlight how it can be more convenient than applying all the data wrangling operations separately. Some important +points to mention here are that it is not necessary to pass the `DataFrame` as an argument inside the `@chain` block, and that it is not restricted to including +`DataFramesMeta.jl` macros (it can also include functions from `DataFrames.jl` such as `rename`). ## Get in touch diff --git a/docs/reference.md b/docs/reference.md index ec839ee..88bea32 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -1,28 +1,115 @@ --- -title: Reference Sheets for Pumas-AI Workshop PLACEHOLDER +title: Reference Sheets for Pumas-AI Data Wrangling Workshop --- [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) ## Key Points -This can be either a markdown table or a list. +- Before reading or writing data, make sure that you are in the correct directory. You can check the present working directory with the `pwd` function, +and you can navigate to another directory using `cd`. +- You can enter the `shell>` mode in the REPL by typing `;`, which enables you to execute system shell commands (e.g., `pwd`, `cd`, `mkdir`, etc.). +- There are various data formats, but CSV is one of the most commonly used formats. +- To read a CSV file, you can use the `CSV.read` function, and to create or write a CSV file, you can use the `CSV.write` function. +- The `CSV.read` function takes two arguments: a file path and a sink. In most cases, you will use `DataFrame` as the sink. +- The `rename` function allows you to change the column names of a `DataFrame`. +- In some cases, CSV files may not use commas for separation. If that is the case, you can use the `delim` keyword argument to specify +the character used in your file. +- In some regions, commas are used to separate decimals instead of dots (e.g., `3,14` instead of `3.14`). In such cases, columns containing +`Float`s will be interpreted as `String`s. To avoid this, you can use the `decimal` keyword argument. +- The `XLSX.jl` package enables reading and writing of Excel files (`.xlsx`). To read a file, you can use the `XLSX.readtable` function, and to write a file, +you can use `XLSX.writetable`. +- When using `XLSX.readtable`, you need to specify the sheet you want to read from since Excel files can have multiple sheets. If you are unsure +about the sheets in the Excel file, you can use `XLSX.readxlsx` and `XLSX.sheetnames` to obtain a `Vector` containing all the sheet names. +- SAS files (`.sasb7dat` and `.xpt`) can be read using the `readstat` function provided by the `ReadStatTables.jl` package. +- Currently, `ReadStatTables.jl` only supports reading files. Write support is experimental and not fully developed. +- You can read and write files from different locations by providing the full or relative path instead of just the file name. For more information on +specifying robust and complex file paths, refer to the [Filesystem](https://docs.julialang.org/en/v1/base/file/#Filesystem) section in the Julia documentation. +- To obtain a `Vector` with all the column names of a `DataFrame`, you can use the `names` function. This is particularly useful when examining +`DataFrame`s with a large number of columns. +- `DataFramesMeta.jl` imports `DataFrames.jl`, allowing you to import only `DataFramesMeta.jl` and still have access to the functions from `DataFrames.jl`. +- You can select a group of columns from a `DataFrame` using the `@select` macro provided by `DataFramesMeta.jl`. +- Instead of specifying which columns you want to select, you can specify the columns that you **don't** want to select using the `Not` operator, +which need to be called with `$()` (e.g. `@select $(Not(column_name))`). +- You can select the rows in a `DataFrame` that satisfy a condition using the `@[r]subset` macro. +- The row version of a `DataFramesMeta.jl` macro can be accessed by adding an `r` before the macro name (e.g., `@rsubset`, `@rtransform`, etc.). +These versions are useful as they eliminate the need to broadcast all operations inside the call, but there are cases where it is not possible to do so. +- To remove rows that have `missing` values in a column, you can use `@rsubset !ismissing(:column_name)`. +- The `@[r]transform` macro allows you to create a new column or modify an existing one. +- The `@astable` macro enables access to intermediate calculations within a `DataFramesMeta.jl` macro call and allows operations on multiple columns simultaneously. +- By appending `!` at the end of a macro call (e.g., `@[r]transform!` or `select!`), you can modify the original `DataFrame` instead of creating a new one. +- The `groupby` function is used to group data in a `DataFrame` based on specific columns. When used together with `@combine`, it enables +applying operations on grouped data and generating new aggregated results. +- The `@by` macro provides a concise alternative to using `groupby` and `@combine`. It allows grouping data and applying operations in a single call. +- Including `length(:column)` in a `@combine` or `@by` call will return the number of rows in each grouped `DataFrame` as part of the aggregated results. +The column name used does not affect the results. +- You can perform all your data wrangling operations in a single block using `@chain`. This block can include both `DataFramesMeta.jl` macros and functions +such as `rename`. Additionally, `@chain` passes the `DataFrame` as an argument to every function and macro call. For example, inside a `@chain` block, +you can write `@groupby ` instead of `@groupby `. ## Summary of Basic Commands -| Action | Command | Observations | -| ----------- | ------------- | --------------------- | -| placeholder | `placeholder` | this is a placeholder | +| Action | Command | Observations | +|-------------------------------------------|--------------------------------------|-------------------------------------------------| +| Get the current working directory | `pwd()` | Equivalent to running `pwd` in the shell | +| Change the current working directory | `cd()` | Equivalent to running `cd ` in the shell | +| Enter the `shell>` mode in the Julia REPL | Type `;` in the REPL | | +| Read a CSV file | `CSV.read(, )` | The sink argument will be a `DataFrame` most of the time | +| Write a CSV file | `CSV.write(, )` | | +| Change the column names | `rename(, )` or `rename(, )` | Using the function version can be useful to apply the same type of change to all the columns in the `DataFrame` | +| Read an Excel file | `DataFrame(XLSX.readtable(, ))` | | +| Write an Excel file | `XLSX.writetable(, )` | | +| Inspect the sheet names of an Excel file | `XLSX.readxlsx()` and `XLSX.sheetnames()` (optional) | The result of `XLSX.readxlsx` will print a table containing the sheet names. You can optionally then run `XLSX.sheetnames` on the result of `readxlsx` to get a `Vector` with all the sheet names | +| Read a SAS file (.sasb7dat and .xpt) | `DataFrame(readstat())` | | | +| Get the column names of a `DataFrame` | `names()` | | | +| Get the values from a `DataFrame`'s column | `DataFrame.column_name`, `DataFrame[!, column_name]` or `DataFrame[:, column_name]` | The dot syntax is more readable and easier to type, but the indexing syntax could be more intuitive for some users. Using `:` when indexing returns a copy of the column, while using `!` returns the original column from the `DataFrame` (you could use the result of indexing with `!` to modify the source `DataFrame`) | +| Select one or more columns from a `DataFrame` | `@select column1 column2 ...` | Can also be done through indexing, but the `@select` macro is more convenient and expressive | +| Use the row version of a `DataFramesMeta.jl` macro | `@r` (e.g `@rsubset`, `@rtransform`, etc.) | | +| Filter rows in a `DataFrame` using a boolean expression | `@[r]subset ` | | +| Determine whether a variable is of `Type` `Missing` | `ismissing()` | Can be used with `@[r]subset` to remove missing values from a `DataFrame` | +| Create or modify a column | `@[r]transform ` | The expression is written in the assignment form (e.g. `:column_name = `). If you want to create a new column, then the assignment should be for a column name that doesn't exist in the `DataFrame`. If you use an existing column name, `@[r]transform` will modify that column. | +| Access intermediate calculations and manipulate multiple columns at the same time | Include `@astable` inside a macro call | Should be included before the expressions corresponding to the macro call (e.g. `@[r]transform @astable `) | +| Use the in-place (mutating) version of a macro | Add `!` add the end (e.g `@[r]transform!`) | This will apply the changes to the original `DataFrame`, instead of creating a new one | +| Group data in a `DataFrame` according to one or more columns | `groupby(, )` | If you want to use more than one column, `` should be a `Vector` of column names | +| Apply operations on a grouped `DataFrame` to create aggregated results | `@combine ` | | +| Group a `DataFrame` and apply operations to create aggregated results | `@by ` | It is equivalent to `groupby(, )` and then `@combine ` | +| Perform all data wrangling operations in a single block | `@chain ` | It is not necessary to pass the `DataFrame` as an argument to the macros and functions used inside of the `@chain` block | ## Glossary -`term1` +CSV files -: Definition of the term one above. +: CSV stands for **C**omma-**S**eparated **V**alues. It is a popular file format that uses lines to represent rows (observations) +and commas (`,`) to separate values (although other characters such as `;` can also be used). -`term2` +Sink (from `CSV.read`) -: Definition of the term two above. +: It is the second positional argument from `CSV.read` and is used to specify where to store or materialize the parsed data from the CSV file. +Most of the time you will want to set use a `DataFrame` (`CSV.read(, DataFrame)`) + +Excel + +: Excel is a widely used spreadsheet program developed by Microsoft. Excel files typically have the `.xls` and `.xlsx` extensions, but the `.xlsx` extension +should be preferred. + +SAS data files + +: Data format used and created by the SAS statistical software. They come in two common extensions: `.sas7bdat` and `.xpt`. These files can be read in Julia +using the `ReadStatTables.jl` package. + +`DataFrame` + +: `DataFrame`s are a versatile and widely used data structure that represents tabular data. You can use them in Julia through the `DataFrames.jl` package. + +`DataFrames.jl` + +: Julia package that allows working with `DataFrames` in Julia. It has a similar design and functionality to other well-known packages such as +[`pandas`](https://pandas.pydata.org/) from Python or [`dplyr`](https://dplyr.tidyverse.org/) from R. + +`DataFramesMeta.jl` + +: A powerful package in Julia that extends the functionality of `DataFrames.jl`, enabling advanced data manipulation and transformation. +It provides a concise and expressive syntax for defining data transformations through the use of macros. ## Get in touch diff --git a/docs/waiver.md b/docs/waiver.md index 017ff3c..bf94153 100644 --- a/docs/waiver.md +++ b/docs/waiver.md @@ -1,5 +1,5 @@ --- -title: Waiver of Liability for Pumas-AI Workshop PLACEHOLDER +title: Waiver of Liability for Pumas-AI Data Wrangling Workshop --- [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by-sa/4.0/) diff --git a/iv_bolus_sd.sas7bdat b/iv_bolus_sd.sas7bdat new file mode 100644 index 0000000..6734842 Binary files /dev/null and b/iv_bolus_sd.sas7bdat differ diff --git a/iv_bolus_sd.xpt b/iv_bolus_sd.xpt new file mode 100644 index 0000000..e2a40e7 Binary files /dev/null and b/iv_bolus_sd.xpt differ diff --git a/mkdocs.yml b/mkdocs.yml index dab0a34..e85e76d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,7 +1,7 @@ # yaml-language-server: $schema=https://squidfunk.github.io/mkdocs-material/schema.json -site_name: Pumas-AI Workshop PLACEHOLDER -repo_name: PumasAI-Labs/Workshop-PLACEHOLDER -repo_url: https://github.com/PumasAI-Labs/Workshop-PLACEHOLDER +site_name: Pumas-AI Data Wrangling Workshop +repo_name: PumasAI-Labs/Data-Wrangling +repo_url: https://github.com/PumasAI-Labs/Data-Wrangling copyright: Copyright © 2023 Pumas-AI, Inc. plugins: - search