Skip to content

Commit

Permalink
Add formatting configuration and apply it (#3)
Browse files Browse the repository at this point in the history
* Added configuration file

Remove mardown formatting

* Applied formatting to Julia files
  • Loading branch information
jotas6 authored Aug 31, 2023
1 parent 18739c4 commit c6fd366
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 53 deletions.
2 changes: 2 additions & 0 deletions .JuliaFormatter.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
indent = 2
format_docstrings = true
13 changes: 5 additions & 8 deletions 01-files.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ df = CSV.read("demographics.csv", DataFrame) # read(<filepath>, <sink>)

# Writing files
## As an example, let's change some column names and then save it
renamed_df = rename(
df,
Dict("AGE" => "AGE (years)", "WEIGHT" => "WEIGHT (kg)")
)
renamed_df = rename(df, Dict("AGE" => "AGE (years)", "WEIGHT" => "WEIGHT (kg)"))

## Tip: you can rename columns programmatically by passing a function
lowercase_df = rename(lowercase, df) # Make all columns be lowercase
Expand Down Expand Up @@ -66,7 +63,7 @@ DataFrame(XLSX.readtable("demographics.xlsx", 1)) # We get the first sheet
DataFrame(XLSX.readtable("data/demographics.xlsx", "Sheet1"))

# Allow XLSX to infer types (columns will be Any by default)
DataFrame(XLSX.readtable("demographics.xlsx", "Sheet1"; infer_eltypes=true)) # You will most definitely want to infer the columns types
DataFrame(XLSX.readtable("demographics.xlsx", "Sheet1"; infer_eltypes = true)) # You will most definitely want to infer the columns types

# Writing files
XLSX.writetable("demographics_new.xlsx", renamed_df) # Same syntax as CSV.write (<filepath>, <DataFrame>)
Expand All @@ -93,7 +90,7 @@ DataFrame(readstat("iv_bolus_sd.xpt"))
##############################################################################################
# Optional: run this to delete all the files created in the examples
begin
root_files = filter(contains("new"), readdir())
data_files = joinpath.("data", filter(contains("new"), readdir("data")))
foreach(rm, vcat(root_files, data_files))
root_files = filter(contains("new"), readdir())
data_files = joinpath.("data", filter(contains("new"), readdir("data")))
foreach(rm, vcat(root_files, data_files))
end
22 changes: 11 additions & 11 deletions 02-select_subset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ names(df) # Get all column names

## Get a single column as a vector
df.AGE # DataFrame.column_name
df.WEIGHT
df.WEIGHT

df[!, "AGE"] # Indexing, as if it was a matrix
df[!, "WEIGHT"]
Expand All @@ -24,9 +24,9 @@ using DataFramesMeta # You don't need to import DataFrames if you import DataFra
@select(df, :AGE, :WEIGHT) # We can also call it in a similar way to functions

@select df begin # block syntax, probably the best alternative for multiple columns
:ID
:AGE
:WEIGHT
:ID
:AGE
:WEIGHT
end

## Tip: select columns the other way around
Expand All @@ -43,16 +43,16 @@ df[4:16, All()] # Get rows 4 to 16 for all columns

# You can also have multiple conditions
@subset df begin
:AGE .> 60
:ISMALE .== 1 # Get males only
:WEIGHT .< 50 # Get subjects that weigh less than 50 kg
:AGE .> 60
:ISMALE .== 1 # Get males only
:WEIGHT .< 50 # Get subjects that weigh less than 50 kg
end

## Tip: use @rsubset instead of broadcasting everything (.>, .==, etc.)
@rsubset df begin
:AGE > 60
:ISMALE == 1
:WEIGHT < 50
:AGE > 60
:ISMALE == 1
:WEIGHT < 50
end

## You don't always want to use @rsubset
Expand All @@ -61,4 +61,4 @@ end

## Common use case: remove rows that have missing values in one column
df_iv = DataFrame(readstat("iv_bolus_sd.xpt"))
@rsubset df_iv !ismissing(:conc)
@rsubset df_iv !ismissing(:conc)
11 changes: 5 additions & 6 deletions 03-transform.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ df

# You can also apply multiple transformations at once
@rtransform df begin
:ISMALE = :ISMALE == 0 ? " Female" : "Male"
:AGE = Int(round(:AGE, digits=0)) # Round age to an integer
:AGE_months = :AGE * 12 # Calculate age in months
:ISMALE = :ISMALE == 0 ? " Female" : "Male"
:AGE = Int(round(:AGE, digits = 0)) # Round age to an integer
:AGE_months = :AGE * 12 # Calculate age in months
end

# Notice that our age in months was not computed from the rounded version of the AGE column
## We have to use @astable to be able to use intermediate results
@rtransform df @astable begin
:AGE = Int(round(:AGE, digits=0))
:AGE_months = :AGE * 12
:AGE = Int(round(:AGE, digits = 0))
:AGE_months = :AGE * 12
end

# Modify the original DataFrame
Expand All @@ -37,4 +37,3 @@ df # Now we only have female subjects

@select! df :AGE :WEIGHT :SEX
df # Now we lost the rest of the columns

12 changes: 6 additions & 6 deletions 04-grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ combined_df = @combine groupby(df, [:WEIGHT_cat, :ISMALE]) :AGE = mean(:AGE)

## Tip: you can include multiple calculations inside of @combine
@combine grouped_df begin
:AGE = mean(:AGE)
:WEIGHT = mean(:WEIGHT)
:n = length(:AGE) # Calculate the number of subjects for each group
:AGE = mean(:AGE)
:WEIGHT = mean(:WEIGHT)
:n = length(:AGE) # Calculate the number of subjects for each group
end

# the @by macro: groupby + @combine in one call
@by df :ISMALE begin
:AGE = mean(:AGE)
:WEIGHT = mean(:WEIGHT)
:n = length(:AGE)
:AGE = mean(:AGE)
:WEIGHT = mean(:WEIGHT)
:n = length(:AGE)
end
37 changes: 15 additions & 22 deletions 05-chaining.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,28 @@ df = CSV.read("demographics.csv", DataFrame)

# Get ages for all female subjects
@chain df begin
@rsubset :ISMALE == 0
@select :ID :AGE # We didn't have to pass df as an argument
@rsubset :ISMALE == 0
@select :ID :AGE # We didn't have to pass df as an argument
end

# More complicated example
@chain df begin

@rtransform begin
:SEX = :ISMALE == 0 ? "Female" : "Male" # Create the new sex column
:WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" # Create weight categories
end
@rtransform begin
:SEX = :ISMALE == 0 ? "Female" : "Male" # Create the new sex column
:WEIGHT_cat = :WEIGHT > 70 ? "Over 70 kg" : "Under 70 kg" # Create weight categories
end

@by [:SEX, :WEIGHT_cat] begin # Calculate mean values for each column
:AGE = mean(:AGE)
:SCR = mean(:SCR)
:eGFR = mean(:eGFR)
:n = length(:AGE)
end
@by [:SEX, :WEIGHT_cat] begin # Calculate mean values for each column
:AGE = mean(:AGE)
:SCR = mean(:SCR)
:eGFR = mean(:eGFR)
:n = length(:AGE)
end

@orderby :SEX :WEIGHT_cat # Fix ordering
@orderby :SEX :WEIGHT_cat # Fix ordering

# Make column names more readable
rename(
Dict(
:SEX => :Sex,
:WEIGHT_cat => :Weight,
:AGE => :Age
)
)
# Make column names more readable
rename(Dict(:SEX => :Sex, :WEIGHT_cat => :Weight, :AGE => :Age))

end

0 comments on commit c6fd366

Please sign in to comment.