Skip to content

Commit

Permalink
Release V0.1.5
Browse files Browse the repository at this point in the history
Feature Engineering Class
  • Loading branch information
AdrianAntico committed Sep 20, 2021
1 parent 3b175b6 commit 823bbff
Show file tree
Hide file tree
Showing 14 changed files with 243 additions and 91 deletions.
132 changes: 100 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![Version: 0.1.4](https://img.shields.io/static/v1?label=Version&message=0.1.4&color=blue&?style=plastic)
![Version: 0.1.5](https://img.shields.io/static/v1?label=Version&message=0.1.5&color=blue&?style=plastic)
![Python](https://img.shields.io/badge/Python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
![Build: Passing](https://img.shields.io/static/v1?label=Build&message=passing&color=brightgreen)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
Expand All @@ -16,7 +16,7 @@ This package is currently in its beginning stages. I'll be working off a bluepri
pip install git+https://github.com/AdrianAntico/RetroFit.git#egg=retrofit
# From pypi
pip install retrofit==0.1.4
pip install retrofit==0.1.5
# Check out R package RemixAutoML
https://github.com/AdrianAntico/RemixAutoML
Expand Down Expand Up @@ -2538,38 +2538,91 @@ x.FitListNames
import pkg_resources
import timeit
import datatable as dt
from datatable import sort, f, by
import retrofit
from retrofit import FeatureEngineering_old as fe
from retrofit import DatatableFE as dtfe
from retrofit import MachineLearning as ml
# Load some data
FilePath = pkg_resources.resource_filename('retrofit', 'datasets/ClassificationData.csv')
data = dt.fread(FilePath)
# Instantiate Feature Engineering Class
FE = dtfe.FE()
# Create some lags
data = FE.FE0_AutoLags(
data,
LagColumnNames=['Independent_Variable1', 'Independent_Variable2'],
DateColumnName='DateTime',
ByVariables='Factor_1',
LagPeriods=[1,2],
ImputeValue=-1,
Sort=True,
use_saved_args=False)
# Create some rolling stats
data = FE.FE0_AutoRollStats(
data,
RollColumnNames=['Independent_Variable1','Independent_Variable2'],
DateColumnName='DateTime',
ByVariables='Factor_1',
MovingAvg_Periods=[1,2],
MovingSD_Periods=[2,3],
MovingMin_Periods=[1,2],
MovingMax_Periods=[1,2],
ImputeValue=-1,
Sort=True,
use_saved_args=False)
# Create some diffs
data = FE.FE0_AutoDiff(
data,
DateColumnName='DateTime',
ByVariables=['Factor_1','Factor_2','Factor_3'],
DiffNumericVariables='Independent_Variable1',
DiffDateVariables=None,
DiffGroupVariables=None,
NLag1=0,
NLag2=1,
Sort=True,
use_saved_args=False)
# Dummify
Output = fe.FE1_DummyVariables(
data = FE.FE1_DummyVariables(
data = data,
ArgsList = None,
CategoricalColumnNames = ['Factor_1','Factor_2','Factor_3'],
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
data = Output['data']
use_saved_args=False)
data = data[:, [name not in ['Factor_1','Factor_2','Factor_3'] for name in data.names]]
# Create Calendar Vars
data = FE.FE1_AutoCalendarVariables(
data,
DateColumnNames='DateTime',
CalendarVariables=['wday','month','quarter'],
use_saved_args=False)
# Type conversions for modeling
data = FE.FE1_ColTypeConversions(
data,
Int2Float=True,
Bool2Float=True,
RemoveDateCols=True,
RemoveStrCols=False,
SkipCols=None,
use_saved_args=False)
# Drop Text Cols (no word2vec yet)
data = data[:, [z for z in data.names if z not in ['Comment']]]
# Create partitioned data sets
DataFrames = fe.FE2_AutoDataParition(
data = data,
ArgsList = None,
DataFrames = FE.FE2_AutoDataPartition(
data,
DateColumnName = None,
PartitionType = 'random',
Ratios = [0.7,0.2,0.1],
ByVariables = None,
Sort = False,
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
Sort = False,
use_saved_args = False)
# Features
Features = [z for z in list(data.names) if not z in ['Adrian','DateTime','Comment','Weights']]
Expand Down Expand Up @@ -2642,38 +2695,53 @@ x.FitListNames
import pkg_resources
import timeit
import datatable as dt
from datatable import sort, f, by
import retrofit
from retrofit import FeatureEngineering_old as fe
from retrofit import DatatableFE as dtfe
from retrofit import MachineLearning as ml
# Load some data
FilePath = pkg_resources.resource_filename('retrofit', 'datasets/MultiClassData.csv')
data = dt.fread(FilePath)
# Instantiate Feature Engineering Class
FE = dtfe.FE()
# Dummify
Output = fe.FE1_DummyVariables(
data = FE.FE1_DummyVariables(
data = data,
ArgsList = None,
CategoricalColumnNames = ['Factor_2','Factor_3'],
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
data = Output['data']
use_saved_args=False)
data = data[:, [name not in ['Factor_2','Factor_3'] for name in data.names]]
# Create Calendar Vars
data = FE.FE1_AutoCalendarVariables(
data,
DateColumnNames='DateTime',
CalendarVariables=['wday','month','quarter'],
use_saved_args=False)
# Type conversions for modeling
data = FE.FE1_ColTypeConversions(
data,
Int2Float=True,
Bool2Float=True,
RemoveDateCols=True,
RemoveStrCols=False,
SkipCols=None,
use_saved_args=False)
# Drop Text Cols (no word2vec yet)
data = data[:, [z for z in data.names if z not in ['Comment']]]
# Create partitioned data sets
DataFrames = fe.FE2_AutoDataParition(
data = data,
ArgsList = None,
DataFrames = FE.FE2_AutoDataPartition(
data,
DateColumnName = None,
PartitionType = 'random',
Ratios = [0.7,0.2,0.1],
ByVariables = None,
Sort = False,
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
Sort = False,
use_saved_args = False)
# Features
Features = [z for z in list(data.names) if not z in ['Adrian','DateTime','Comment','Weights']]
Expand Down
Binary file removed dist/retrofit-0.1.4-py3-none-any.whl
Binary file not shown.
Binary file removed dist/retrofit-0.1.4.tar.gz
Binary file not shown.
Binary file added dist/retrofit-0.1.5-py3-none-any.whl
Binary file not shown.
Binary file added dist/retrofit-0.1.5.tar.gz
Binary file not shown.
134 changes: 101 additions & 33 deletions retrofit.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: retrofit
Version: 0.1.4
Version: 0.1.5
Summary: AutoML, Forecasting, NLP, Image Classification, Feature Engineering, Model Evaluation, Model Interpretation, Fast Processing.
Home-page: https://github.com/AdrianAntico/retrofit
Author-email: adrianantico@gmail.com
Expand All @@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3
Description-Content-Type: text/markdown
License-File: LICENSE

![Version: 0.1.4](https://img.shields.io/static/v1?label=Version&message=0.1.4&color=blue&?style=plastic)
![Version: 0.1.5](https://img.shields.io/static/v1?label=Version&message=0.1.5&color=blue&?style=plastic)
![Python](https://img.shields.io/badge/Python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
![Build: Passing](https://img.shields.io/static/v1?label=Build&message=passing&color=brightgreen)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
Expand All @@ -30,7 +30,7 @@ This package is currently in its beginning stages. I'll be working off a bluepri
pip install git+https://github.com/AdrianAntico/RetroFit.git#egg=retrofit

# From pypi
pip install retrofit==0.1.4
pip install retrofit==0.1.5

# Check out R package RemixAutoML
https://github.com/AdrianAntico/RemixAutoML
Expand Down Expand Up @@ -2552,38 +2552,91 @@ x.FitListNames
import pkg_resources
import timeit
import datatable as dt
from datatable import sort, f, by
import retrofit
from retrofit import FeatureEngineering_old as fe
from retrofit import DatatableFE as dtfe
from retrofit import MachineLearning as ml

# Load some data
FilePath = pkg_resources.resource_filename('retrofit', 'datasets/ClassificationData.csv')
data = dt.fread(FilePath)

# Instantiate Feature Engineering Class
FE = dtfe.FE()

# Create some lags
data = FE.FE0_AutoLags(
data,
LagColumnNames=['Independent_Variable1', 'Independent_Variable2'],
DateColumnName='DateTime',
ByVariables='Factor_1',
LagPeriods=[1,2],
ImputeValue=-1,
Sort=True,
use_saved_args=False)

# Create some rolling stats
data = FE.FE0_AutoRollStats(
data,
RollColumnNames=['Independent_Variable1','Independent_Variable2'],
DateColumnName='DateTime',
ByVariables='Factor_1',
MovingAvg_Periods=[1,2],
MovingSD_Periods=[2,3],
MovingMin_Periods=[1,2],
MovingMax_Periods=[1,2],
ImputeValue=-1,
Sort=True,
use_saved_args=False)

# Create some diffs
data = FE.FE0_AutoDiff(
data,
DateColumnName='DateTime',
ByVariables=['Factor_1','Factor_2','Factor_3'],
DiffNumericVariables='Independent_Variable1',
DiffDateVariables=None,
DiffGroupVariables=None,
NLag1=0,
NLag2=1,
Sort=True,
use_saved_args=False)

# Dummify
Output = fe.FE1_DummyVariables(
data = FE.FE1_DummyVariables(
data = data,
ArgsList = None,
CategoricalColumnNames = ['Factor_1','Factor_2','Factor_3'],
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
data = Output['data']
use_saved_args=False)
data = data[:, [name not in ['Factor_1','Factor_2','Factor_3'] for name in data.names]]

# Create Calendar Vars
data = FE.FE1_AutoCalendarVariables(
data,
DateColumnNames='DateTime',
CalendarVariables=['wday','month','quarter'],
use_saved_args=False)

# Type conversions for modeling
data = FE.FE1_ColTypeConversions(
data,
Int2Float=True,
Bool2Float=True,
RemoveDateCols=True,
RemoveStrCols=False,
SkipCols=None,
use_saved_args=False)

# Drop Text Cols (no word2vec yet)
data = data[:, [z for z in data.names if z not in ['Comment']]]

# Create partitioned data sets
DataFrames = fe.FE2_AutoDataParition(
data = data,
ArgsList = None,
DataFrames = FE.FE2_AutoDataPartition(
data,
DateColumnName = None,
PartitionType = 'random',
Ratios = [0.7,0.2,0.1],
ByVariables = None,
Sort = False,
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
Sort = False,
use_saved_args = False)

# Features
Features = [z for z in list(data.names) if not z in ['Adrian','DateTime','Comment','Weights']]
Expand Down Expand Up @@ -2656,38 +2709,53 @@ x.FitListNames
import pkg_resources
import timeit
import datatable as dt
from datatable import sort, f, by
import retrofit
from retrofit import FeatureEngineering_old as fe
from retrofit import DatatableFE as dtfe
from retrofit import MachineLearning as ml

# Load some data
FilePath = pkg_resources.resource_filename('retrofit', 'datasets/MultiClassData.csv')
data = dt.fread(FilePath)

# Instantiate Feature Engineering Class
FE = dtfe.FE()

# Dummify
Output = fe.FE1_DummyVariables(
data = FE.FE1_DummyVariables(
data = data,
ArgsList = None,
CategoricalColumnNames = ['Factor_2','Factor_3'],
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
data = Output['data']
use_saved_args=False)
data = data[:, [name not in ['Factor_2','Factor_3'] for name in data.names]]

# Create Calendar Vars
data = FE.FE1_AutoCalendarVariables(
data,
DateColumnNames='DateTime',
CalendarVariables=['wday','month','quarter'],
use_saved_args=False)

# Type conversions for modeling
data = FE.FE1_ColTypeConversions(
data,
Int2Float=True,
Bool2Float=True,
RemoveDateCols=True,
RemoveStrCols=False,
SkipCols=None,
use_saved_args=False)

# Drop Text Cols (no word2vec yet)
data = data[:, [z for z in data.names if z not in ['Comment']]]

# Create partitioned data sets
DataFrames = fe.FE2_AutoDataParition(
data = data,
ArgsList = None,
DataFrames = FE.FE2_AutoDataPartition(
data,
DateColumnName = None,
PartitionType = 'random',
Ratios = [0.7,0.2,0.1],
ByVariables = None,
Sort = False,
Processing = 'datatable',
InputFrame = 'datatable',
OutputFrame = 'datatable')
Sort = False,
use_saved_args = False)

# Features
Features = [z for z in list(data.names) if not z in ['Adrian','DateTime','Comment','Weights']]
Expand Down
Loading

0 comments on commit 823bbff

Please sign in to comment.