Add formatting configuration and apply it (#3)

* Added configuration file Remove mardown formatting * Applied formatting to Julia files
PumasAI-Labs · Aug 31, 2023 · c6fd366 · c6fd366
1 parent 18739c4
commit c6fd366
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 53 deletions.
diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
@@ -0,0 +1,2 @@
+indent = 2
+format_docstrings = true
diff --git a/01-files.jl b/01-files.jl
@@ -11,10 +11,7 @@ df = CSV.read("demographics.csv", DataFrame) # read(<filepath>, <sink>)
 
 # Writing files
 ## As an example, let's change some column names and then save it
-renamed_df = rename(
-    df,
-    Dict("AGE" => "AGE (years)", "WEIGHT" => "WEIGHT (kg)")
-)
+renamed_df = rename(df, Dict("AGE" => "AGE (years)", "WEIGHT" => "WEIGHT (kg)"))
 
 ## Tip: you can rename columns programmatically by passing a function
 lowercase_df = rename(lowercase, df) # Make all columns be lowercase
@@ -66,7 +63,7 @@ DataFrame(XLSX.readtable("demographics.xlsx", 1)) # We get the first sheet
 DataFrame(XLSX.readtable("data/demographics.xlsx", "Sheet1"))
 
 # Allow XLSX to infer types (columns will be Any by default)
-DataFrame(XLSX.readtable("demographics.xlsx", "Sheet1"; infer_eltypes=true)) # You will most definitely want to infer the columns types
+DataFrame(XLSX.readtable("demographics.xlsx", "Sheet1"; infer_eltypes = true)) # You will most definitely want to infer the columns types
 
 # Writing files
 XLSX.writetable("demographics_new.xlsx", renamed_df) # Same syntax as CSV.write (<filepath>, <DataFrame>)
@@ -93,7 +90,7 @@ DataFrame(readstat("iv_bolus_sd.xpt"))
 ##############################################################################################
 # Optional: run this to delete all the files created in the examples
 begin
-    root_files = filter(contains("new"), readdir())
-    data_files = joinpath.("data", filter(contains("new"), readdir("data")))
-    foreach(rm, vcat(root_files, data_files))
+  root_files = filter(contains("new"), readdir())
+  data_files = joinpath.("data", filter(contains("new"), readdir("data")))
+  foreach(rm, vcat(root_files, data_files))
 end
diff --git a/02-select_subset.jl b/02-select_subset.jl
@@ -6,7 +6,7 @@ names(df) # Get all column names
 
 ## Get a single column as a vector
 df.AGE # DataFrame.column_name
-df.WEIGHT 
+df.WEIGHT
 
 df[!, "AGE"] # Indexing, as if it was a matrix
 df[!, "WEIGHT"]
@@ -24,9 +24,9 @@ using DataFramesMeta # You don't need to import DataFrames if you import DataFra
 @select(df, :AGE, :WEIGHT) # We can also call it in a similar way to functions
 
 @select df begin # block syntax, probably the best alternative for multiple columns
-    :ID
-    :AGE
-    :WEIGHT    
+  :ID
+  :AGE
+  :WEIGHT
 end
 
 ## Tip: select columns the other way around
@@ -43,16 +43,16 @@ df[4:16, All()] # Get rows 4 to 16 for all columns
 
 # You can also have multiple conditions
 @subset df begin
-    :AGE .> 60
-    :ISMALE .== 1 # Get males only
-    :WEIGHT .< 50 # Get subjects that weigh less than 50 kg
+  :AGE .> 60
+  :ISMALE .== 1 # Get males only
+  :WEIGHT .< 50 # Get subjects that weigh less than 50 kg
 end
 
 ## Tip: use @rsubset instead of broadcasting everything (.>, .==, etc.)
 @rsubset df begin
-    :AGE > 60
-    :ISMALE == 1
-    :WEIGHT < 50
+  :AGE > 60
+  :ISMALE == 1
+  :WEIGHT < 50
 end
 
 ## You don't always want to use @rsubset
@@ -61,4 +61,4 @@ end
 
 ## Common use case: remove rows that have missing values in one column
 df_iv = DataFrame(readstat("iv_bolus_sd.xpt"))
-@rsubset df_iv !ismissing(:conc)
+@rsubset df_iv !ismissing(:conc)
diff --git a/03-transform.jl b/03-transform.jl
@@ -12,16 +12,16 @@ df
 
 # You can also apply multiple transformations at once
 @rtransform df begin
-    :ISMALE = :ISMALE == 0 ? " Female" : "Male"
-    :AGE = Int(round(:AGE, digits=0)) # Round age to an integer
-    :AGE_months = :AGE * 12 # Calculate age in months
+  :ISMALE = :ISMALE == 0 ? " Female" : "Male"
+  :AGE = Int(round(:AGE, digits = 0)) # Round age to an integer
+  :AGE_months = :AGE * 12 # Calculate age in months
 end
 
 # Notice that our age in months was not computed from the rounded version of the AGE column
 ## We have to use @astable to be able to use intermediate results
 @rtransform df @astable begin
-    :AGE = Int(round(:AGE, digits=0))
-    :AGE_months = :AGE * 12
+  :AGE = Int(round(:AGE, digits = 0))
+  :AGE_months = :AGE * 12
 end
 
 # Modify the original DataFrame
@@ -37,4 +37,3 @@ df # Now we only have female subjects
 
 @select! df :AGE :WEIGHT :SEX
 df # Now we lost the rest of the columns
-
diff --git a/04-grouping.jl b/04-grouping.jl
@@ -26,14 +26,14 @@ combined_df = @combine groupby(df, [:WEIGHT_cat, :ISMALE]) :AGE = mean(:AGE)
 
 ## Tip: you can include multiple calculations inside of @combine
 @combine grouped_df begin
-    :AGE = mean(:AGE)
-    :WEIGHT = mean(:WEIGHT)
-    :n = length(:AGE) # Calculate the number of subjects for each group
+  :AGE = mean(:AGE)
+  :WEIGHT = mean(:WEIGHT)
+  :n = length(:AGE) # Calculate the number of subjects for each group
 end
 
 # the @by macro: groupby + @combine in one call
 @by df :ISMALE begin
-    :AGE = mean(:AGE)
-    :WEIGHT = mean(:WEIGHT)
-    :n = length(:AGE)
+  :AGE = mean(:AGE)
+  :WEIGHT = mean(:WEIGHT)
+  :n = length(:AGE)
 end
diff --git a/05-chaining.jl b/05-chaining.jl
@@ -3,35 +3,28 @@ df = CSV.read("demographics.csv", DataFrame)
 
 # Get ages for all female subjects
 @chain df begin
-    @rsubset :ISMALE == 0
-    @select :ID :AGE # We didn't have to pass df as an argument
+  @rsubset :ISMALE == 0
+  @select :ID :AGE # We didn't have to pass df as an argument
 end
 
 # More complicated example
 @chain df begin
 
-    @rtransform begin
-        :SEX = :ISMALE == 0 ? "Female" : "Male" # Create the new sex column
-        :WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" # Create weight categories
-    end
+  @rtransform begin
+    :SEX = :ISMALE == 0 ? "Female" : "Male" # Create the new sex column
+    :WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" # Create weight categories
+  end
 
-    @by [:SEX, :WEIGHT_cat] begin # Calculate mean values for each column
-        :AGE = mean(:AGE)
-        :SCR = mean(:SCR)
-        :eGFR = mean(:eGFR)
-        :n = length(:AGE)
-    end
+  @by [:SEX, :WEIGHT_cat] begin # Calculate mean values for each column
+    :AGE = mean(:AGE)
+    :SCR = mean(:SCR)
+    :eGFR = mean(:eGFR)
+    :n = length(:AGE)
+  end
 
-    @orderby :SEX :WEIGHT_cat # Fix ordering
+  @orderby :SEX :WEIGHT_cat # Fix ordering
 
-    # Make column names more readable
-    rename(
-        Dict(
-            :SEX => :Sex,
-            :WEIGHT_cat => :Weight,
-            :AGE => :Age
-        )
-    )
+  # Make column names more readable
+  rename(Dict(:SEX => :Sex, :WEIGHT_cat => :Weight, :AGE => :Age))
 
 end
-