Skip to content

Commit

Permalink
Release V0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianAntico committed Sep 4, 2021
1 parent f90f066 commit 5ef6c1c
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 13 deletions.
102 changes: 97 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![Version: 0.0.9](https://img.shields.io/static/v1?label=Version&message=0.0.9&color=blue&?style=plastic)
![Version: 0.0.1](https://img.shields.io/static/v1?label=Version&message=0.1.0&color=blue&?style=plastic)
![Python](https://img.shields.io/badge/Python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
![Build: Passing](https://img.shields.io/static/v1?label=Build&message=passing&color=brightgreen)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
Expand All @@ -16,7 +16,7 @@ This package is currently in its beginning stages. I'll be working off a bluepri
pip install git+https://github.com/AdrianAntico/RetroFit.git#egg=retrofit
# From pypi
pip install retrofit==0.0.9
pip install retrofit==0.1.0
# Check out R package RemixAutoML
https://github.com/AdrianAntico/RemixAutoML
Expand Down Expand Up @@ -314,8 +314,6 @@ print(data.names)





#### **FE0_AutoDiff()**
<p>

Expand Down Expand Up @@ -681,7 +679,7 @@ ArgsList = DataSets['ArgsList']
<details><summary>Function Description</summary>
<p>

<code>ML0_GetModelData()</code> Automatically create data sets chosen ML algorithm
<code>ML0_GetModelData()</code> Automatically create data sets chosen ML algorithm. Currently supports catboost, xgboost, and lightgbm.

</p>
</details>
Expand All @@ -697,6 +695,10 @@ import retrofit
from retrofit import FeatureEngineering as fe
from retrofit import MachineLearning as ml
############################################################################################
# CatBoost
############################################################################################
# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")
Expand Down Expand Up @@ -737,6 +739,96 @@ DataSets = ml.ML0_GetModelData(
catboost_train = DataSets['train_data']
catboost_validation = DataSets['validation_data']
catboost_test = DataSets['test_data']
############################################################################################
# XGBoost
############################################################################################
# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")
# Create partitioned data sets
DataSets = fe.FE2_AutoDataParition(
data=data,
ArgsList=None,
DateColumnName='CalendarDateColumn',
PartitionType='random',
Ratios=[0.70,0.20,0.10],
ByVariables=None,
Processing='datatable',
InputFrame='datatable',
OutputFrame='datatable')
# Collect partitioned data
TrainData = DataSets['TrainData']
ValidationData = DataSets['ValidationData']
TestData = DataSets['TestData']
del DataSets
# Create xgboost data sets
DataSets = ml.ML0_GetModelData(
TrainData=TrainData,
ValidationData=ValidationData,
TestData=TestData,
ArgsList=None,
TargetColumnName='Leads',
NumericColumnNames=['XREGS1', 'XREGS2', 'XREGS3'],
CategoricalColumnNames=['MarketingSegments','MarketingSegments2','MarketingSegments3','Label'],
TextColumnNames=None,
WeightColumnName=None,
Threads=-1,
Processing='xgboost',
InputFrame='datatable')
# Collect xgboost training data
xgboost_train = DataSets['train_data']
xgboost_validation = DataSets['validation_data']
xgboost_test = DataSets['test_data']
############################################################################################
# LightGBM
############################################################################################
# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")
# Create partitioned data sets
DataSets = fe.FE2_AutoDataParition(
data=data,
ArgsList=None,
DateColumnName='CalendarDateColumn',
PartitionType='random',
Ratios=[0.70,0.20,0.10],
ByVariables=None,
Processing='datatable',
InputFrame='datatable',
OutputFrame='datatable')
# Collect partitioned data
TrainData = DataSets['TrainData']
ValidationData = DataSets['ValidationData']
TestData = DataSets['TestData']
del DataSets
# Create lightgbm data sets
DataSets = ml.ML0_GetModelData(
TrainData=TrainData,
ValidationData=ValidationData,
TestData=TestData,
ArgsList=None,
TargetColumnName='Leads',
NumericColumnNames=['XREGS1', 'XREGS2', 'XREGS3'],
CategoricalColumnNames=['MarketingSegments','MarketingSegments2','MarketingSegments3','Label'],
TextColumnNames=None,
WeightColumnName=None,
Threads=-1,
Processing='lightgbm',
InputFrame='datatable')
# Collect lightgbm training data
lightgbm_train = DataSets['train_data']
lightgbm_validation = DataSets['validation_data']
lightgbm_test = DataSets['test_data']
```

</p>
Expand Down
Binary file removed dist/retrofit-0.0.9-py3-none-any.whl
Binary file not shown.
Binary file removed dist/retrofit-0.0.9.tar.gz
Binary file not shown.
Binary file modified dist/retrofit-0.1.0-py3-none-any.whl
Binary file not shown.
Binary file modified dist/retrofit-0.1.0.tar.gz
Binary file not shown.
102 changes: 97 additions & 5 deletions retrofit.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3
Description-Content-Type: text/markdown
License-File: LICENSE

![Version: 0.0.9](https://img.shields.io/static/v1?label=Version&message=0.0.9&color=blue&?style=plastic)
![Version: 0.0.1](https://img.shields.io/static/v1?label=Version&message=0.1.0&color=blue&?style=plastic)
![Python](https://img.shields.io/badge/Python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
![Build: Passing](https://img.shields.io/static/v1?label=Build&message=passing&color=brightgreen)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
Expand All @@ -31,7 +31,7 @@ This package is currently in its beginning stages. I'll be working off a bluepri
pip install git+https://github.com/AdrianAntico/RetroFit.git#egg=retrofit

# From pypi
pip install retrofit==0.0.9
pip install retrofit==0.1.0

# Check out R package RemixAutoML
https://github.com/AdrianAntico/RemixAutoML
Expand Down Expand Up @@ -329,8 +329,6 @@ print(data.names)





#### **FE0_AutoDiff()**
<p>

Expand Down Expand Up @@ -696,7 +694,7 @@ ArgsList = DataSets['ArgsList']
<details><summary>Function Description</summary>
<p>

<code>ML0_GetModelData()</code> Automatically create data sets chosen ML algorithm
<code>ML0_GetModelData()</code> Automatically create data sets chosen ML algorithm. Currently supports catboost, xgboost, and lightgbm.

</p>
</details>
Expand All @@ -712,6 +710,10 @@ import retrofit
from retrofit import FeatureEngineering as fe
from retrofit import MachineLearning as ml

############################################################################################
# CatBoost
############################################################################################

# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")

Expand Down Expand Up @@ -752,6 +754,96 @@ DataSets = ml.ML0_GetModelData(
catboost_train = DataSets['train_data']
catboost_validation = DataSets['validation_data']
catboost_test = DataSets['test_data']

############################################################################################
# XGBoost
############################################################################################

# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")

# Create partitioned data sets
DataSets = fe.FE2_AutoDataParition(
data=data,
ArgsList=None,
DateColumnName='CalendarDateColumn',
PartitionType='random',
Ratios=[0.70,0.20,0.10],
ByVariables=None,
Processing='datatable',
InputFrame='datatable',
OutputFrame='datatable')

# Collect partitioned data
TrainData = DataSets['TrainData']
ValidationData = DataSets['ValidationData']
TestData = DataSets['TestData']
del DataSets

# Create xgboost data sets
DataSets = ml.ML0_GetModelData(
TrainData=TrainData,
ValidationData=ValidationData,
TestData=TestData,
ArgsList=None,
TargetColumnName='Leads',
NumericColumnNames=['XREGS1', 'XREGS2', 'XREGS3'],
CategoricalColumnNames=['MarketingSegments','MarketingSegments2','MarketingSegments3','Label'],
TextColumnNames=None,
WeightColumnName=None,
Threads=-1,
Processing='xgboost',
InputFrame='datatable')

# Collect xgboost training data
xgboost_train = DataSets['train_data']
xgboost_validation = DataSets['validation_data']
xgboost_test = DataSets['test_data']

############################################################################################
# LightGBM
############################################################################################

# Load some data
data = dt.fread("C:/Users/Bizon/Documents/GitHub/BenchmarkData.csv")

# Create partitioned data sets
DataSets = fe.FE2_AutoDataParition(
data=data,
ArgsList=None,
DateColumnName='CalendarDateColumn',
PartitionType='random',
Ratios=[0.70,0.20,0.10],
ByVariables=None,
Processing='datatable',
InputFrame='datatable',
OutputFrame='datatable')

# Collect partitioned data
TrainData = DataSets['TrainData']
ValidationData = DataSets['ValidationData']
TestData = DataSets['TestData']
del DataSets

# Create lightgbm data sets
DataSets = ml.ML0_GetModelData(
TrainData=TrainData,
ValidationData=ValidationData,
TestData=TestData,
ArgsList=None,
TargetColumnName='Leads',
NumericColumnNames=['XREGS1', 'XREGS2', 'XREGS3'],
CategoricalColumnNames=['MarketingSegments','MarketingSegments2','MarketingSegments3','Label'],
TextColumnNames=None,
WeightColumnName=None,
Threads=-1,
Processing='lightgbm',
InputFrame='datatable')

# Collect lightgbm training data
lightgbm_train = DataSets['train_data']
lightgbm_validation = DataSets['validation_data']
lightgbm_test = DataSets['test_data']
```

</p>
Expand Down
2 changes: 1 addition & 1 deletion retrofit/FeatureEngineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def FE0_AutoLags(data = None, ArgsList=None, LagColumnNames = None, DateColumnNa
return dict(data = data, ArgsList = ArgsList)


def FE0_AutoRollStats(data = None, ArgsList=None, RollColumnNames = None, DateColumnName = None, ByVariables = None, MovingAvg_Periods = 2, MovingSD_Periods = None, MovingMin_Periods = None, MovingMax_Periods = None, ImputeValue = -1, Sort = True, Processing='datatable', InputFrame='datatable', OutputFrame='datatable'):
def FE0_AutoRollStats(data = None, ArgsList=None, RollColumnNames = None, DateColumnName = None, ByVariables = None, MovingAvg_Periods = None, MovingSD_Periods = None, MovingMin_Periods = None, MovingMax_Periods = None, ImputeValue = -1, Sort = True, Processing='datatable', InputFrame='datatable', OutputFrame='datatable'):
"""
# Goal:
Automatically generate rolling averages, standard deviations, mins and maxes for multiple periods for multiple variables and by variables
Expand Down
4 changes: 2 additions & 2 deletions retrofit/MachineLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def ML0_GetModelData(TrainData=None, ValidationData=None, TestData=None, ArgsLis
Create modeling objects for specific algorithms. E.g. create train, valid, and test objects for catboost
# Output
Return frames for catboost, xgboost, lightgbm, etc.
Return frames for catboost, xgboost, and lightgbm, currently.
# Parameters
TrainData: Source data. Either a datatable frame, polars frame, or pandas frame. The function will run either datatable code or polars code. If your input frame is pandas
Expand All @@ -23,7 +23,7 @@ def ML0_GetModelData(TrainData=None, ValidationData=None, TestData=None, ArgsLis
TextColumnNames: List of integers for the lookback lengths
WeightColumnName: Value to fill the NA's for beginning of series
Threads: Number of threads to utilize if available for the algorithm
Processing: 'datatable' or 'polars'. Choose the package you want to do your processing
Processing: 'catboost', 'xgboost', or 'lightgbm'
InputFrame: 'datatable', 'polars', or 'pandas' If you input Frame is 'pandas', it will be converted to a datatable Frame for generating the new columns
# ML0_GetModelData Example:
Expand Down
6 changes: 6 additions & 0 deletions retrofit/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Module: utils
# Author: Adrian Antico <adrianantico@gmail.com>
# License: MIT
# Release: retrofit 0.1.0
# Last modified : 2021-09-03

def cumsum(x):
"""
Create a list of summed up values from another list
Expand Down

0 comments on commit 5ef6c1c

Please sign in to comment.