From a8360b02e26202dcc50a42c090137bdfe458d1cf Mon Sep 17 00:00:00 2001 From: Martin-Molinero Date: Wed, 7 Feb 2024 15:35:20 -0300 Subject: [PATCH] Initial commit --- .github/workflows/build.yml | 50 ++++ .gitignore | 279 ++++++++++++++++++++ DataProcessing/CLRImports.py | 12 + DataProcessing/DataProcessing.csproj | 30 +++ DataProcessing/MyCustomDataDownloader.cs | 236 +++++++++++++++++ DataProcessing/Program.cs | 81 ++++++ DataProcessing/config.json | 5 + DataProcessing/process.sample.ipynb | 78 ++++++ DataProcessing/process.sample.py | 32 +++ DataProcessing/process.sample.sh | 0 Demonstration.cs | 77 ++++++ Demonstration.py | 47 ++++ DemonstrationUniverse.cs | 66 +++++ DemonstrationUniverse.py | 50 ++++ DropboxDownloader.py | 119 +++++++++ MyCustomDataType.cs | 156 +++++++++++ MyCustomDataUniverseType.cs | 141 ++++++++++ QuantConnect.DataSource.csproj | 27 ++ README.md | 47 ++++ examples.md | 1 + listing-about.md | 45 ++++ listing-documentation.md | 39 +++ output/alternative/mycustomdatatype/spy.csv | 6 + renameDataset.sh | 57 ++++ tests/MyCustomDataTypeTests.cs | 99 +++++++ tests/Tests.csproj | 23 ++ 26 files changed, 1803 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 DataProcessing/CLRImports.py create mode 100644 DataProcessing/DataProcessing.csproj create mode 100644 DataProcessing/MyCustomDataDownloader.cs create mode 100644 DataProcessing/Program.cs create mode 100644 DataProcessing/config.json create mode 100644 DataProcessing/process.sample.ipynb create mode 100644 DataProcessing/process.sample.py create mode 100644 DataProcessing/process.sample.sh create mode 100644 Demonstration.cs create mode 100644 Demonstration.py create mode 100644 DemonstrationUniverse.cs create mode 100644 DemonstrationUniverse.py create mode 100644 DropboxDownloader.py create mode 100644 MyCustomDataType.cs create mode 100644 MyCustomDataUniverseType.cs create mode 100644 QuantConnect.DataSource.csproj create mode 100644 README.md create mode 100644 examples.md create mode 100644 listing-about.md create mode 100644 listing-documentation.md create mode 100644 output/alternative/mycustomdatatype/spy.csv create mode 100644 renameDataset.sh create mode 100644 tests/MyCustomDataTypeTests.cs create mode 100644 tests/Tests.csproj diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..09449fc --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,50 @@ +name: Build & Test + +on: + push: + branches: ['*'] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + + - name: Free space + run: df -h && rm -rf /opt/hostedtoolcache* && df -h + + - name: Pull Foundation Image + uses: addnab/docker-run-action@v3 + with: + image: quantconnect/lean:foundation + + - name: Checkout Lean Same Branch + id: lean-same-branch + uses: actions/checkout@v2 + continue-on-error: true + with: + ref: ${{ github.ref }} + repository: QuantConnect/Lean + path: Lean + + - name: Checkout Lean Master + if: steps.lean-same-branch.outcome != 'success' + uses: actions/checkout@v2 + with: + repository: QuantConnect/Lean + path: Lean + + - name: Move Lean + run: mv Lean ../Lean + + - name: BuildDataSource + run: dotnet build ./QuantConnect.DataSource.csproj /p:Configuration=Release /v:quiet /p:WarningLevel=1 + + - name: BuildTests + run: dotnet build ./tests/Tests.csproj /p:Configuration=Release /v:quiet /p:WarningLevel=1 + + - name: Run Tests + run: dotnet test ./tests/bin/Release/net6.0/Tests.dll diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..866dd07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,279 @@ +# Object files +*.o +*.ko +*.obj +*.elf +*.pyc + +# Visual Studio Project Items: +*.suo + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +#*.dll +*.so +*.so.* +*.dylib + +# Executables +*/bin/*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# QC Cloud Setup Bash Files +*.sh +# Include docker launch scripts for Mac/Linux +!run_docker.sh +!research/run_docker_notebook.sh + +# QC Config Files: +# config.json + +# QC-C-Specific +*Engine/bin/Debug/cache/data/*.zip +*/obj/* +*/bin/* +*Data/* +*Docker/* +*/Docker/* +*Algorithm.Python/Lib/* +*/[Ee]xtensions/* +!**/Libraries/* + +# C Debug Binaries +*.pdb + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +.vs/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ + +# Roslyn cache directories +*.ide/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +#NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings +*.DotSettings.user + +# JustCode is a .NET coding addin-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# JetBrains Rider +.idea/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# NuGet Packages +*.nupkg +!LocalPackages/* +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# If using the old MSBuild-Integrated Package Restore, uncomment this: +#!**/packages/repositories.config +# ignore sln level nuget +.nuget/ +!.nuget/NuGet.config + +# Windows Azure Build Output +csx/ +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +*.Cache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ +bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# Test Runner +testrunner/ + +# Meld original diff files +*.orig + +# Output chart data +Charts/ + +# NCrunch files +*.ncrunchsolution +*.ncrunchproject + +# QuantConnect plugin files +QuantConnectProjects.xml +Launcher/Plugins/* +/ApiPython/dist +/ApiPython/quantconnect.egg-info +/ApiPython/quantconnect.egg-info/* + +QuantConnect.Lean.sln.DotSettings* + +#User notebook files +Research/Notebooks + +#Docker result files +Results/ \ No newline at end of file diff --git a/DataProcessing/CLRImports.py b/DataProcessing/CLRImports.py new file mode 100644 index 0000000..fca9342 --- /dev/null +++ b/DataProcessing/CLRImports.py @@ -0,0 +1,12 @@ +# This file is used to import the environment and classes/methods of LEAN. +# so that any python file could be using LEAN's classes/methods. +from clr_loader import get_coreclr +from pythonnet import set_runtime + +# process.runtimeconfig.json is created when we build the DataProcessing Project: +# dotnet build .\DataProcessing\DataProcessing.csproj +set_runtime(get_coreclr('process.runtimeconfig.json')) + +from AlgorithmImports import * +from QuantConnect.Lean.Engine.DataFeeds import * +AddReference("Fasterflect") \ No newline at end of file diff --git a/DataProcessing/DataProcessing.csproj b/DataProcessing/DataProcessing.csproj new file mode 100644 index 0000000..26b5466 --- /dev/null +++ b/DataProcessing/DataProcessing.csproj @@ -0,0 +1,30 @@ + + + Exe + net6.0 + process + true + + + + + + + + + + + + + + + PreserveNewest + + + + + + PreserveNewest + + + \ No newline at end of file diff --git a/DataProcessing/MyCustomDataDownloader.cs b/DataProcessing/MyCustomDataDownloader.cs new file mode 100644 index 0000000..1bfef32 --- /dev/null +++ b/DataProcessing/MyCustomDataDownloader.cs @@ -0,0 +1,236 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Threading; +using System.Threading.Tasks; +using Newtonsoft.Json; +using QuantConnect.Configuration; +using QuantConnect.Data.Auxiliary; +using QuantConnect.DataSource; +using QuantConnect.Lean.Engine.DataFeeds; +using QuantConnect.Logging; +using QuantConnect.Util; + +namespace QuantConnect.DataProcessing +{ + /// + /// MyCustomDataDownloader implementation. + /// + public class MyCustomDataDownloader : IDisposable + { + public const string VendorName = "VendorName"; + public const string VendorDataName = "VendorDataName"; + + private readonly string _destinationFolder; + private readonly string _universeFolder; + private readonly string _clientKey; + private readonly string _dataFolder = Globals.DataFolder; + private readonly bool _canCreateUniverseFiles; + private readonly int _maxRetries = 5; + private static readonly List _defunctDelimiters = new() + { + '-', + '_' + }; + private ConcurrentDictionary> _tempData = new(); + + private readonly JsonSerializerSettings _jsonSerializerSettings = new() + { + DateTimeZoneHandling = DateTimeZoneHandling.Utc + }; + + /// + /// Control the rate of download per unit of time. + /// + private readonly RateGate _indexGate; + + /// + /// Creates a new instance of + /// + /// The folder where the data will be saved + /// The Vendor API key + public MyCustomDataDownloader(string destinationFolder, string apiKey = null) + { + _destinationFolder = Path.Combine(destinationFolder, VendorDataName); + _universeFolder = Path.Combine(_destinationFolder, "universe"); + _clientKey = apiKey ?? Config.Get("vendor-auth-token"); + _canCreateUniverseFiles = Directory.Exists(Path.Combine(_dataFolder, "equity", "usa", "map_files")); + + // Represents rate limits of 10 requests per 1.1 second + _indexGate = new RateGate(10, TimeSpan.FromSeconds(1.1)); + + Directory.CreateDirectory(_destinationFolder); + Directory.CreateDirectory(_universeFolder); + } + + /// + /// Runs the instance of the object. + /// + /// True if process all downloads successfully + public bool Run() + { + var stopwatch = Stopwatch.StartNew(); + var today = DateTime.UtcNow.Date; + + throw new NotImplementedException(); + + Log.Trace($"MyCustomDataDownloader.Run(): Finished in {stopwatch.Elapsed.ToStringInvariant(null)}"); + return true; + } + + /// + /// Sends a GET request for the provided URL + /// + /// URL to send GET request for + /// Content as string + /// Failed to get data after exceeding retries + private async Task HttpRequester(string url) + { + for (var retries = 1; retries <= _maxRetries; retries++) + { + try + { + using (var client = new HttpClient()) + { + client.BaseAddress = new Uri(""); + client.DefaultRequestHeaders.Clear(); + + // You must supply your API key in the HTTP header, + // otherwise you will receive a 403 Forbidden response + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Token", _clientKey); + + // Responses are in JSON: you need to specify the HTTP header Accept: application/json + client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + + // Makes sure we don't overrun Quiver rate limits accidentally + _indexGate.WaitToProceed(); + + var response = await client.GetAsync(Uri.EscapeUriString(url)); + if (response.StatusCode == HttpStatusCode.NotFound) + { + Log.Error($"MyCustomDataDownloader.HttpRequester(): Files not found at url: {Uri.EscapeUriString(url)}"); + response.DisposeSafely(); + return string.Empty; + } + + if (response.StatusCode == HttpStatusCode.Unauthorized) + { + var finalRequestUri = response.RequestMessage.RequestUri; // contains the final location after following the redirect. + response = client.GetAsync(finalRequestUri).Result; // Reissue the request. The DefaultRequestHeaders configured on the client will be used, so we don't have to set them again. + } + + response.EnsureSuccessStatusCode(); + + var result = await response.Content.ReadAsStringAsync(); + response.DisposeSafely(); + + return result; + } + } + catch (Exception e) + { + Log.Error(e, $"MyCustomDataDownloader.HttpRequester(): Error at HttpRequester. (retry {retries}/{_maxRetries})"); + Thread.Sleep(1000); + } + } + + throw new Exception($"Request failed with no more retries remaining (retry {_maxRetries}/{_maxRetries})"); + } + + /// + /// Saves contents to disk, deleting existing zip files + /// + /// Final destination of the data + /// file name + /// Contents to write + private void SaveContentToFile(string destinationFolder, string name, IEnumerable contents) + { + name = name.ToLowerInvariant(); + var finalPath = Path.Combine(destinationFolder, $"{name}.csv"); + var finalFileExists = File.Exists(finalPath); + + var lines = new HashSet(contents); + if (finalFileExists) + { + foreach (var line in File.ReadAllLines(finalPath)) + { + lines.Add(line); + } + } + + var finalLines = destinationFolder.Contains("universe") ? + lines.OrderBy(x => x.Split(',').First()).ToList() : + lines + .OrderBy(x => DateTime.ParseExact(x.Split(',').First(), "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal)) + .ToList(); + + var tempPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.tmp"); + File.WriteAllLines(tempPath, finalLines); + var tempFilePath = new FileInfo(tempPath); + tempFilePath.MoveTo(finalPath, true); + } + + /// + /// Tries to normalize a potentially defunct ticker into a normal ticker. + /// + /// Ticker as received from Estimize + /// Set as the non-defunct ticker + /// true for success, false for failure + private static bool TryNormalizeDefunctTicker(string ticker, out string nonDefunctTicker) + { + // The "defunct" indicator can be in any capitalization/case + if (ticker.IndexOf("defunct", StringComparison.OrdinalIgnoreCase) > 0) + { + foreach (var delimChar in _defunctDelimiters) + { + var length = ticker.IndexOf(delimChar); + + // Continue until we exhaust all delimiters + if (length == -1) + { + continue; + } + + nonDefunctTicker = ticker[..length].Trim(); + return true; + } + + nonDefunctTicker = string.Empty; + return false; + } + + nonDefunctTicker = ticker; + return true; + } + + /// + /// Disposes of unmanaged resources + /// + public void Dispose() + { + _indexGate?.Dispose(); + } + } +} \ No newline at end of file diff --git a/DataProcessing/Program.cs b/DataProcessing/Program.cs new file mode 100644 index 0000000..b203eb3 --- /dev/null +++ b/DataProcessing/Program.cs @@ -0,0 +1,81 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +using System; +using System.IO; +using QuantConnect.Configuration; +using QuantConnect.Logging; +using QuantConnect.Util; + +namespace QuantConnect.DataProcessing +{ + /// + /// Entrypoint for the data downloader/converter + /// + public class Program + { + /// + /// Entrypoint of the program + /// + /// Exit code. 0 equals successful, and any other value indicates the downloader/converter failed. + public static void Main() + { + // Get the config values first before running. These values are set for us + // automatically to the value set on the website when defining this data type + var destinationDirectory = Path.Combine( + Config.Get("temp-output-directory", "/temp-output-directory"), + "alternative", + "vendorname"); + + MyCustomDataDownloader instance = null; + try + { + // Pass in the values we got from the configuration into the downloader/converter. + instance = new MyCustomDataDownloader(destinationDirectory); + } + catch (Exception err) + { + Log.Error(err, $"QuantConnect.DataProcessing.Program.Main(): The downloader/converter for {MyCustomDataDownloader.VendorDataName} {MyCustomDataDownloader.VendorDataName} data failed to be constructed"); + Environment.Exit(1); + } + + // No need to edit anything below here for most use cases. + // The downloader/converter is ran and cleaned up for you safely here. + try + { + // Run the data downloader/converter. + var success = instance.Run(); + if (!success) + { + Log.Error($"QuantConnect.DataProcessing.Program.Main(): Failed to download/process {MyCustomDataDownloader.VendorName} {MyCustomDataDownloader.VendorDataName} data"); + Environment.Exit(1); + } + } + catch (Exception err) + { + Log.Error(err, $"QuantConnect.DataProcessing.Program.Main(): The downloader/converter for {MyCustomDataDownloader.VendorDataName} {MyCustomDataDownloader.VendorDataName} data exited unexpectedly"); + Environment.Exit(1); + } + finally + { + // Run cleanup of the downloader/converter once it has finished or crashed. + instance.DisposeSafely(); + } + + // The downloader/converter was successful + Environment.Exit(0); + } + } +} \ No newline at end of file diff --git a/DataProcessing/config.json b/DataProcessing/config.json new file mode 100644 index 0000000..1d1e2f2 --- /dev/null +++ b/DataProcessing/config.json @@ -0,0 +1,5 @@ +{ + "data-folder": "../../../Data/", + + "vendor-auth-token": "" +} \ No newline at end of file diff --git a/DataProcessing/process.sample.ipynb b/DataProcessing/process.sample.ipynb new file mode 100644 index 0000000..2493379 --- /dev/null +++ b/DataProcessing/process.sample.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9b8eae46", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# CLRImports is required to handle Lean C# objects for Mapped Datasets (Single asset and Universe Selection)\n", + "# Requirements:\n", + "# python -m pip install clr-loader==0.1.7\n", + "# python -m pip install pythonnet==3.0.0a2\n", + "# This script must be executed in ./bin/Debug/net6.0 after the follwing command is executed\n", + "# dotnet build .\\DataProcessing\\\n", + "import os\n", + "from CLRImports import *\n", + "\n", + "# To use QuantBook, we need to set its internal handlers\n", + "# We download LEAN confif with the default settings \n", + "with open(\"quantbook.json\", 'w') as fp:\n", + " from requests import get\n", + " response = get(\"https://raw.githubusercontent.com/QuantConnect/Lean/master/Launcher/config.json\")\n", + " fp.write(response.text)\n", + "\n", + "Config.SetConfigurationFile(\"quantbook.json\")\n", + "Config.Set(\"composer-dll-directory\", os.path.abspath(''))\n", + "\n", + "# Set the data folder\n", + "Config.Set(\"data-folder\", '')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ddc2ed2-5690-422c-8c91-6e6f64dd45cb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# To generate the Security Identifier, we need to create and initialize the Map File Provider\n", + "# and call the SecurityIdentifier.GenerateEquity method\n", + "mapFileProvider = LocalZipMapFileProvider()\n", + "mapFileProvider.Initialize(DefaultDataProvider())\n", + "sid = SecurityIdentifier.GenerateEquity(\"SPY\", Market.USA, True, mapFileProvider, datetime(2022, 3, 1))\n", + "\n", + "qb = QuantBook()\n", + "symbol = Symbol(sid, \"SPY\")\n", + "history = qb.History(symbol, 3600, Resolution.Daily)\n", + "print(history)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/DataProcessing/process.sample.py b/DataProcessing/process.sample.py new file mode 100644 index 0000000..5b8285d --- /dev/null +++ b/DataProcessing/process.sample.py @@ -0,0 +1,32 @@ +# CLRImports is required to handle Lean C# objects for Mapped Datasets (Single asset and Universe Selection) +# Requirements: +# python -m pip install clr-loader==0.1.7 +# python -m pip install pythonnet==3.0.0a2 +# This script must be executed in ./bin/Debug/net6.0 after the follwing command is executed +# dotnet build .\DataProcessing\ +import os +from CLRImports import * + +# To use QuantBook, we need to set its internal handlers +# We download LEAN confif with the default settings +with open("quantbook.json", 'w') as fp: + from requests import get + response = get("https://raw.githubusercontent.com/QuantConnect/Lean/master/Launcher/config.json") + fp.write(response.text) + +Config.SetConfigurationFile("quantbook.json") +Config.Set("composer-dll-directory", os.path.dirname(os.path.realpath(__file__))) + +# Set the data folder +Config.Set("data-folder", '') + +# To generate the Security Identifier, we need to create and initialize the Map File Provider +# and call the SecurityIdentifier.GenerateEquity method +mapFileProvider = LocalZipMapFileProvider() +mapFileProvider.Initialize(DefaultDataProvider()) +sid = SecurityIdentifier.GenerateEquity("SPY", Market.USA, True, mapFileProvider, datetime(2022, 3, 1)) + +qb = QuantBook() +symbol = Symbol(sid, "SPY") +history = qb.History(symbol, 3600, Resolution.Daily) +print(history) \ No newline at end of file diff --git a/DataProcessing/process.sample.sh b/DataProcessing/process.sample.sh new file mode 100644 index 0000000..e69de29 diff --git a/Demonstration.cs b/Demonstration.cs new file mode 100644 index 0000000..fc0eb1f --- /dev/null +++ b/Demonstration.cs @@ -0,0 +1,77 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * +*/ + +using QuantConnect.Data; +using QuantConnect.Util; +using QuantConnect.Orders; +using QuantConnect.Algorithm; +using QuantConnect.DataSource; + +namespace QuantConnect.DataLibrary.Tests +{ + /// + /// Example algorithm using the custom data type as a source of alpha + /// + public class CustomDataAlgorithm : QCAlgorithm + { + private Symbol _customDataSymbol; + private Symbol _equitySymbol; + + /// + /// Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized. + /// + public override void Initialize() + { + SetStartDate(2013, 10, 07); //Set Start Date + SetEndDate(2013, 10, 11); //Set End Date + _equitySymbol = AddEquity("SPY").Symbol; + _customDataSymbol = AddData(_equitySymbol).Symbol; + } + + /// + /// OnData event is the primary entry point for your algorithm. Each new data point will be pumped in here. + /// + /// Slice object keyed by symbol containing the stock data + public override void OnData(Slice slice) + { + var data = slice.Get(); + if (!data.IsNullOrEmpty()) + { + // based on the custom data property we will buy or short the underlying equity + if (data[_customDataSymbol].SomeCustomProperty == "buy") + { + SetHoldings(_equitySymbol, 1); + } + else if (data[_customDataSymbol].SomeCustomProperty == "sell") + { + SetHoldings(_equitySymbol, -1); + } + } + } + + /// + /// Order fill event handler. On an order fill update the resulting information is passed to this method. + /// + /// Order event details containing details of the events + public override void OnOrderEvent(OrderEvent orderEvent) + { + if (orderEvent.Status.IsFill()) + { + Debug($"Purchased Stock: {orderEvent.Symbol}"); + } + } + } +} diff --git a/Demonstration.py b/Demonstration.py new file mode 100644 index 0000000..2f55e0d --- /dev/null +++ b/Demonstration.py @@ -0,0 +1,47 @@ +# QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. +# Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from AlgorithmImports import * + +### +### Example algorithm using the custom data type as a source of alpha +### +class CustomDataAlgorithm(QCAlgorithm): + def Initialize(self): + ''' Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.''' + + self.SetStartDate(2020, 10, 7) #Set Start Date + self.SetEndDate(2020, 10, 11) #Set End Date + self.equity_symbol = self.AddEquity("SPY", Resolution.Daily).Symbol + self.custom_data_symbol = self.AddData(MyCustomDataType, self.equity_symbol).Symbol + + def OnData(self, slice): + ''' OnData event is the primary entry point for your algorithm. Each new data point will be pumped in here. + + :param Slice slice: Slice object keyed by symbol containing the stock data + ''' + data = slice.Get(MyCustomDataType) + if data: + custom_data = data[self.custom_data_symbol] + if custom_data.SomeCustomProperty == "buy": + self.SetHoldings(self.equitySymbol, 1) + elif custom_data.SomeCustomProperty == "sell": + self.SetHoldings(self.equitySymbol, -1) + + def OnOrderEvent(self, orderEvent): + ''' Order fill event handler. On an order fill update the resulting information is passed to this method. + + :param OrderEvent orderEvent: Order event details containing details of the events + ''' + if orderEvent.Status == OrderStatus.Fill: + self.Debug(f'Purchased Stock: {orderEvent.Symbol}') \ No newline at end of file diff --git a/DemonstrationUniverse.cs b/DemonstrationUniverse.cs new file mode 100644 index 0000000..d8b962c --- /dev/null +++ b/DemonstrationUniverse.cs @@ -0,0 +1,66 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * +*/ + +using System; +using System.Linq; +using QuantConnect.Data; +using QuantConnect.Data.UniverseSelection; +using QuantConnect.DataSource; + +namespace QuantConnect.Algorithm.CSharp +{ + /// + /// Example algorithm using the custom data type as a source of alpha + /// + public class CustomDataUniverse : QCAlgorithm + { + /// + /// Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized. + /// + public override void Initialize() + { + // Data ADDED via universe selection is added with Daily resolution. + UniverseSettings.Resolution = Resolution.Daily; + + SetStartDate(2022, 2, 14); + SetEndDate(2022, 2, 18); + SetCash(100000); + + // add a custom universe data source (defaults to usa-equity) + AddUniverse("MyCustomDataUniverseType", Resolution.Daily, data => + { + foreach (var datum in data) + { + Log($"{datum.Symbol},{datum.SomeCustomProperty},{datum.SomeNumericProperty}"); + } + + // define our selection criteria + return from d in data + where d.SomeCustomProperty == "buy" + select d.Symbol; + }); + } + + /// + /// Event fired each time that we add/remove securities from the data feed + /// + /// Security additions/removals for this time step + public override void OnSecuritiesChanged(SecurityChanges changes) + { + Log(changes.ToString()); + } + } +} \ No newline at end of file diff --git a/DemonstrationUniverse.py b/DemonstrationUniverse.py new file mode 100644 index 0000000..80b3657 --- /dev/null +++ b/DemonstrationUniverse.py @@ -0,0 +1,50 @@ +# QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. +# Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from AlgorithmImports import * + +### +### Example algorithm using the custom data type as a source of alpha +### +class CustomDataUniverse(QCAlgorithm): + def Initialize(self): + ''' Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized. ''' + + # Data ADDED via universe selection is added with Daily resolution. + self.UniverseSettings.Resolution = Resolution.Daily + + self.SetStartDate(2022, 2, 14) + self.SetEndDate(2022, 2, 18) + self.SetCash(100000) + + # add a custom universe data source (defaults to usa-equity) + self.AddUniverse(MyCustomDataUniverseType, "MyCustomDataUniverseType", Resolution.Daily, self.UniverseSelection) + + def UniverseSelection(self, data): + ''' Selected the securities + + :param List of MyCustomUniverseType data: List of MyCustomUniverseType + :return: List of Symbol objects ''' + + for datum in data: + self.Log(f"{datum.Symbol},{datum.Followers},{datum.DayPercentChange},{datum.WeekPercentChange}") + + # define our selection criteria + return [d.Symbol for d in data if d.SomeCustomProperty == 'buy'] + + def OnSecuritiesChanged(self, changes): + ''' Event fired each time that we add/remove securities from the data feed + + :param SecurityChanges changes: Security additions/removals for this time step + ''' + self.Log(changes.ToString()) \ No newline at end of file diff --git a/DropboxDownloader.py b/DropboxDownloader.py new file mode 100644 index 0000000..54b7e55 --- /dev/null +++ b/DropboxDownloader.py @@ -0,0 +1,119 @@ +# QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. +# Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script is used to download files from a given dropbox directory. +# Files to be downloaded are filtered based on given date present in file name. + +# ARGUMENTS +# DROPBOX_API_KEY: Dropbox API KEY with read access. +# DROPBOX_SOURCE_DIRECTORY: path of the dropbox directory to search files within. +# DROPBOX_OUTPUT_DIRECTORY(optional): base path of the output directory to store to downloaded files. +# cmdline args expected in order: DROPBOX_API_KEY, DROPBOX_SOURCE_DIRECTORY, QC_DATAFLEET_DEPLOYMENT_DATE, DROPBOX_OUTPUT_DIRECTORY + +import requests +import json +import sys +import time +import os +from pathlib import Path + +DROPBOX_API_KEY = os.environ.get("DROPBOX_API_KEY") +DROPBOX_SOURCE_DIRECTORY = os.environ.get("DROPBOX_SOURCE_DIRECTORY") +QC_DATAFLEET_DEPLOYMENT_DATE = os.environ.get("QC_DATAFLEET_DEPLOYMENT_DATE") +DROPBOX_OUTPUT_DIRECTORY = os.environ.get("DROPBOX_OUTPUT_DIRECTORY", "/raw") + +def DownloadZipFile(filePath): + + print(f"Starting downloading file at: {filePath}") + + # defining the api-endpoint + API_ENDPOINT_DOWNLOAD = "https://content.dropboxapi.com/2/files/download" + + # data to be sent to api + data = {"path": filePath} + + headers = {"Authorization": f"Bearer {DROPBOX_API_KEY}", + "Dropbox-API-Arg": json.dumps(data)} + + # sending post request and saving response as response object + response = requests.post(url = API_ENDPOINT_DOWNLOAD, headers=headers) + + response.raise_for_status() # ensure we notice bad responses + + fileName = filePath.split("/")[-1] + outputPath = os.path.join(DROPBOX_OUTPUT_DIRECTORY, fileName) + + with open(outputPath, "wb") as f: + f.write(response.content) + print(f"Succesfully saved file at: {outputPath}") + +def GetFilePathsFromDate(targetLocation, dateString): + # defining the api-endpoint + API_ENDPOINT_FILEPATH = "https://api.dropboxapi.com/2/files/list_folder" + + headers = {"Content-Type": "application/json", + "Authorization": f"Bearer {DROPBOX_API_KEY}"} + + # data to be sent to api + data = {"path": targetLocation, + "recursive": False, + "include_media_info": False, + "include_deleted": False, + "include_has_explicit_shared_members": False, + "include_mounted_folders": True, + "include_non_downloadable_files": True} + + # sending post request and saving response as response object + response = requests.post(url = API_ENDPOINT_FILEPATH, headers=headers, data = json.dumps(data)) + + response.raise_for_status() # ensure we notice bad responses + + target_paths = [entry["path_display"] for entry in response.json()["entries"] if dateString in entry["path_display"]] + return target_paths + +def main(): + global DROPBOX_API_KEY, DROPBOX_SOURCE_DIRECTORY, QC_DATAFLEET_DEPLOYMENT_DATE, DROPBOX_OUTPUT_DIRECTORY + inputCount = len(sys.argv) + if inputCount > 1: + DROPBOX_API_KEY = sys.argv[1] + if inputCount > 2: + DROPBOX_SOURCE_DIRECTORY = sys.argv[2] + if inputCount > 3: + QC_DATAFLEET_DEPLOYMENT_DATE = sys.argv[3] + if inputCount > 4: + DROPBOX_OUTPUT_DIRECTORY = sys.argv[4] + + # make output path if doesn't exists + Path(DROPBOX_OUTPUT_DIRECTORY).mkdir(parents=True, exist_ok=True) + + target_paths = GetFilePathsFromDate(DROPBOX_SOURCE_DIRECTORY, QC_DATAFLEET_DEPLOYMENT_DATE) + print(f"Found {len(target_paths)} files with following paths {target_paths}") + + #download files + for path in target_paths: + count = 0 + maxTries = 3 + while True: + try: + DownloadZipFile(path) + break + except Exception as e: + count +=1 + if count > maxTries: + print(f"Error for file with path {path} --error message: {e}") + break + print(f"Error, sleep for 5 sec and retry download file with --path: {path}") + time.sleep(5) + +if __name__== "__main__": + main() diff --git a/MyCustomDataType.cs b/MyCustomDataType.cs new file mode 100644 index 0000000..8c8a025 --- /dev/null +++ b/MyCustomDataType.cs @@ -0,0 +1,156 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * +*/ + +using System; +using NodaTime; +using ProtoBuf; +using System.IO; +using QuantConnect.Data; +using System.Collections.Generic; + +namespace QuantConnect.DataSource +{ + /// + /// Example custom data type + /// + [ProtoContract(SkipConstructor = true)] + public class MyCustomDataType : BaseData + { + /// + /// Some custom data property + /// + [ProtoMember(2000)] + public string SomeCustomProperty { get; set; } + + /// + /// Time passed between the date of the data and the time the data became available to us + /// + public TimeSpan Period { get; set; } = TimeSpan.FromDays(1); + + /// + /// Time the data became available + /// + public override DateTime EndTime => Time + Period; + + /// + /// Return the URL string source of the file. This will be converted to a stream + /// + /// Configuration object + /// Date of this source file + /// true if we're in live mode, false for backtesting mode + /// String URL of source file. + public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode) + { + return new SubscriptionDataSource( + Path.Combine( + Globals.DataFolder, + "alternative", + "mycustomdatatype", + $"{config.Symbol.Value.ToLowerInvariant()}.csv" + ), + SubscriptionTransportMedium.LocalFile + ); + } + + /// + /// Parses the data from the line provided and loads it into LEAN + /// + /// Subscription configuration + /// Line of data + /// Date + /// Is live mode + /// New instance + public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode) + { + var csv = line.Split(','); + + var parsedDate = Parse.DateTimeExact(csv[0], "yyyyMMdd"); + return new MyCustomDataType + { + Symbol = config.Symbol, + SomeCustomProperty = csv[1], + Time = parsedDate - Period, + }; + } + + /// + /// Clones the data + /// + /// A clone of the object + public override BaseData Clone() + { + return new MyCustomDataType + { + Symbol = Symbol, + Time = Time, + EndTime = EndTime, + SomeCustomProperty = SomeCustomProperty, + }; + } + + /// + /// Indicates whether the data source is tied to an underlying symbol and requires that corporate events be applied to it as well, such as renames and delistings + /// + /// false + public override bool RequiresMapping() + { + return true; + } + + /// + /// Indicates whether the data is sparse. + /// If true, we disable logging for missing files + /// + /// true + public override bool IsSparseData() + { + return true; + } + + /// + /// Converts the instance to string + /// + public override string ToString() + { + return $"{Symbol} - {SomeCustomProperty}"; + } + + /// + /// Gets the default resolution for this data and security type + /// + public override Resolution DefaultResolution() + { + return Resolution.Daily; + } + + /// + /// Gets the supported resolution for this data and security type + /// + public override List SupportedResolutions() + { + return DailyResolution; + } + + /// + /// Specifies the data time zone for this data type. This is useful for custom data types + /// + /// The of this data type + public override DateTimeZone DataTimeZone() + { + return DateTimeZone.Utc; + } + } +} diff --git a/MyCustomDataUniverseType.cs b/MyCustomDataUniverseType.cs new file mode 100644 index 0000000..5db2f4b --- /dev/null +++ b/MyCustomDataUniverseType.cs @@ -0,0 +1,141 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * +*/ + +using System; +using NodaTime; +using ProtoBuf; +using System.IO; +using QuantConnect.Data; +using System.Collections.Generic; +using System.Globalization; + +namespace QuantConnect.DataSource +{ + /// + /// Example custom data type + /// + [ProtoContract(SkipConstructor = true)] + public class MyCustomDataUniverseType : BaseData + { + /// + /// Some custom data property + /// + public string SomeCustomProperty { get; set; } + + /// + /// Some custom data property + /// + public decimal SomeNumericProperty { get; set; } + + /// + /// Time passed between the date of the data and the time the data became available to us + /// + public TimeSpan Period { get; set; } = TimeSpan.FromDays(1); + + /// + /// Time the data became available + /// + public override DateTime EndTime => Time + Period; + + /// + /// Return the URL string source of the file. This will be converted to a stream + /// + /// Configuration object + /// Date of this source file + /// true if we're in live mode, false for backtesting mode + /// String URL of source file. + public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode) + { + return new SubscriptionDataSource( + Path.Combine( + Globals.DataFolder, + "alternative", + "mycustomdatatype", + "universe", + $"{date.ToStringInvariant(DateFormat.EightCharacter)}.csv" + ), + SubscriptionTransportMedium.LocalFile + ); + } + + /// + /// Parses the data from the line provided and loads it into LEAN + /// + /// Subscription configuration + /// Line of data + /// Date + /// Is live mode + /// New instance + public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode) + { + var csv = line.Split(','); + + var someNumericProperty = decimal.Parse(csv[2], NumberStyles.Any, CultureInfo.InvariantCulture); + + return new MyCustomDataUniverseType + { + Symbol = new Symbol(SecurityIdentifier.Parse(csv[0]), csv[1]), + SomeNumericProperty = someNumericProperty, + SomeCustomProperty = csv[3], + Time = date - Period, + Value = someNumericProperty + }; + } + + /// + /// Indicates whether the data is sparse. + /// If true, we disable logging for missing files + /// + /// true + public override bool IsSparseData() + { + return true; + } + + /// + /// Converts the instance to string + /// + public override string ToString() + { + return $"{Symbol} - {Value}"; + } + + /// + /// Gets the default resolution for this data and security type + /// + public override Resolution DefaultResolution() + { + return Resolution.Daily; + } + + /// + /// Gets the supported resolution for this data and security type + /// + public override List SupportedResolutions() + { + return DailyResolution; + } + + /// + /// Specifies the data time zone for this data type. This is useful for custom data types + /// + /// The of this data type + public override DateTimeZone DataTimeZone() + { + return DateTimeZone.Utc; + } + } +} \ No newline at end of file diff --git a/QuantConnect.DataSource.csproj b/QuantConnect.DataSource.csproj new file mode 100644 index 0000000..f379576 --- /dev/null +++ b/QuantConnect.DataSource.csproj @@ -0,0 +1,27 @@ + + + net6.0 + QuantConnect.DataSource + QuantConnect.DataSource.MyCustomDataType + bin\$(Configuration) + $(OutputPath)\QuantConnect.DataSource.MyCustomDataType.xml + + + + + + + + + + + + + + + + + + + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..642fc49 --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ +![LEAN Data Source SDK](http://cdn.quantconnect.com.s3.us-east-1.amazonaws.com/datasources/Github_LeanDataSourceSDK.png) + +# Lean DataSource SDK + +[![Build Status](https://github.com/QuantConnect/LeanDataSdk/workflows/Build%20%26%20Test/badge.svg)](https://github.com/QuantConnect/LeanDataSdk/actions?query=workflow%3A%22Build%20%26%20Test%22) + +### Introduction + +The Lean Data SDK is a cross-platform template repository for developing custom data types for Lean. +These data types will be consumed by [QuantConnect](https://www.quantconnect.com/) trading algorithms and research environment, locally or in the cloud. + +It is composed by example .Net solution for the data type and converter scripts. + +### Prerequisites + +The solution targets dotnet 5, for installation instructions please follow [dotnet download](https://dotnet.microsoft.com/download). + +The data downloader and converter script can be developed in different ways: C# executable, Python script, Python Jupyter notebook or even a bash script. +- The python script should be compatible with python 3.6.8 +- Bash script will run on Ubuntu Bionic + +Specifically, the enviroment where these scripts will be run is [quantconnect/research](https://hub.docker.com/repository/docker/quantconnect/research) based on [quantconnect/lean:foundation](https://hub.docker.com/repository/docker/quantconnect/lean). + +### Installation + +The "Use this template" feature should be used for each unique data source which requires its own data processing. Once it is cloned locally, you should be able to successfully build the solution, run all tests and execute the downloader and/or conveter scripts. The final version should pass all CI tests of GitHub Actions. + +Once ready, please contact support@quantconnect.com and we will create a listing in the QuantConnect Data Market for your company and link to your public repository and commit hash. + +### Datasets Vendor Requirements + +Key requirements for new vendors include: + + - A well-defined dataset with a clear and static vision for the data to minimize churn or changes as people will be building systems from it. This is easiest with "raw" data (e.g. sunshine hours vs a sentiment algorithm) + - Robust ticker and security links to ensure the tickers are tracked well through time, or accurately point in time. ISIN, FIGI, or point in time ticker supported + - Robust funding to ensure viable for at least 1 year + - Robust API to ensure reliable up-time. No dead links on site or and 502 servers while using API + - Consistent delivery schedule, on time and in time for market trading + - Consistent data format with notifications and lead time on data format updates + - At least 1 year of historical point in time data + - Survivorship bias free data + - Good documentation for the dataset + + +### Tutorials + + - See [Tutorials](https://www.quantconnect.com/docs/v2/our-platform/datasets/contributing-datasets) for a step by step guide for creating a new LEAN Data Source. \ No newline at end of file diff --git a/examples.md b/examples.md new file mode 100644 index 0000000..9086ae3 --- /dev/null +++ b/examples.md @@ -0,0 +1 @@ +https://github.com/QuantConnect?q=Lean.DataSource&type=&language=&sort= \ No newline at end of file diff --git a/listing-about.md b/listing-about.md new file mode 100644 index 0000000..61e7172 --- /dev/null +++ b/listing-about.md @@ -0,0 +1,45 @@ +### Meta +- **Dataset name**: *`datasetName`* +- **Vendor name**: *`vendorName`* +- **Vendor Website**: *`vendorWebsite`* + + +### Introduction + +*`datasetName`* by *`vendorName`* *`/is/tracks`* *`assetsInMarket`* *`datasetProperty`*. The data covers *`coverageNumber`* *`coverageAssetType`*, starting in *`startDate`*, and is delivered on a *`resolution`* frequency. This dataset is created by *`methodology`*. + +### About the Provider +*`vendorName`* was founded by *`founderName(s)`* in *`foundingYear`*, with the goal of *`missionStatement`*. *`vendorName`* provides access to *`dataTypes`* for *`targetDemographic`*. + +### Getting Started +Python: +``` +# pythonCodeToSubscribeToData +``` + +C#: +``` +// cSharpCodeToSubscribeToData +``` + +### Data Summary +- **Start Date**: *`startDate`* +- **Asset Coverage**: *`assetCoverage`* +- **Resolution**: *`resolution(s)`* +- **Data Density**: *`dataDensity`* +- **Timezone**: *`timezone`* + + +### Example Applications + +The *`vendorName`* *`datasetName`* enables researchers to accurately design strategies harnessing *`datasetProperties`*. Examples include: + +- *`descriptionOfExampleStrategy1`* +- *`descriptionOfExampleStrategy2`* +- *`descriptionOfExampleStrategyN`* + +### Data Point Attributes + +- *`DataSourceClassName1`* +- *`DataSourceClassName2`* +- *`DataSourceClassNameN`* diff --git a/listing-documentation.md b/listing-documentation.md new file mode 100644 index 0000000..0fcd509 --- /dev/null +++ b/listing-documentation.md @@ -0,0 +1,39 @@ +### Requesting Data +To add *`datasetName`* dataset by *`vendorName`* to your algorithm, use the AddData method to request the data. As with all datasets, you should save a reference to your symbol for easy use later in your algorithm. For detailed documentation on using custom data, see [Importing Custom Data](https://www.quantconnect.com/docs/algorithm-reference/importing-custom-data). + +Python: +``` +# pythonCodeToRequestData +``` + +C#: +``` +// cSharpCodeToRequestData +``` + +### Accessing Data +Data can be accessed via Slice events. Slice delivers unique events to your algorithm as they happen. We recommend saving the symbol object when you add the data for easy access to slice later. Data is available in *`resolution`* resolution. You can see an example of the slice accessor in the code below. + +Python: +``` +# pythonCodeToAccessData +``` + +C#: +``` +// cSharpCodeToAccessData +``` + + +### Historical Data +You can request historical custom data in your algorithm using the custom data Symbol object. To learn more about historical data requests, please visit the [Historical Data](https://www.quantconnect.com/docs/algorithm-reference/historical-data) documentation. If there is no custom data in the period you request, the history result will be empty. The following example gets the historical data for *`datasetName`* *`datasetProperties`* by using the History API. + +Python: +``` +# pythonCodeToGetHistoricalData +``` + +C#: +``` +// cSharpCodeToGetHistoricalData +``` \ No newline at end of file diff --git a/output/alternative/mycustomdatatype/spy.csv b/output/alternative/mycustomdatatype/spy.csv new file mode 100644 index 0000000..e450a4d --- /dev/null +++ b/output/alternative/mycustomdatatype/spy.csv @@ -0,0 +1,6 @@ +20131001,buy +20131003,buy +20131006,buy +20131007,sell +20131009,buy +20131011,sell \ No newline at end of file diff --git a/renameDataset.sh b/renameDataset.sh new file mode 100644 index 0000000..51ccd6d --- /dev/null +++ b/renameDataset.sh @@ -0,0 +1,57 @@ +# Get {vendorNameDatasetName} +vendorNameDatasetName=${PWD##*.} +vendorNameDatasetNameUniverse=${vendorNameDatasetName}Universe + +# Rename the MyCustomDataType.cs file to {vendorNameDatasetName}.cs +mv MyCustomDataType.cs ${vendorNameDatasetName}.cs +mv MyCustomDataUniverseType.cs ${vendorNameDatasetNameUniverse}.cs + +# In the QuantConnect.DataSource.csproj file, rename the MyCustomDataType class to {vendorNameDatasetName} +sed -i "s/MyCustomDataType/$vendorNameDatasetName/g" QuantConnect.DataSource.csproj +sed -i "s/Demonstration.cs/${vendorNameDatasetName}Algorithm.cs/g" QuantConnect.DataSource.csproj +sed -i "s/DemonstrationUniverse.cs/${vendorNameDatasetNameUniverse}SelectionAlgorithm.cs/g" QuantConnect.DataSource.csproj + +# In the {vendorNameDatasetName}.cs file, rename the MyCustomDataType class to {vendorNameDatasetName} +sed -i "s/MyCustomDataType/$vendorNameDatasetName/g" ${vendorNameDatasetName}.cs + +# In the {vendorNameDatasetNameUniverse}.cs file, rename the MyCustomDataUniverseType class to {vendorNameDatasetNameUniverse} +sed -i "s/MyCustomDataUniverseType/$vendorNameDatasetNameUniverse/g" ${vendorNameDatasetNameUniverse}.cs + +# In the {vendorNameDatasetName}Algorithm.cs file, rename the MyCustomDataType class to to {vendorNameDatasetName} +sed -i "s/MyCustomDataType/$vendorNameDatasetName/g" Demonstration.cs +sed -i "s/MyCustomDataType/$vendorNameDatasetName/g" Demonstration.py + +# In the {vendorNameDatasetName}Algorithm.cs file, rename the CustomDataAlgorithm class to {vendorNameDatasetName}Algorithm +sed -i "s/CustomDataAlgorithm/${vendorNameDatasetName}Algorithm/g" Demonstration.cs +sed -i "s/CustomDataAlgorithm/${vendorNameDatasetName}Algorithm/g" Demonstration.py + +# In the {vendorNameDatasetName}UniverseSelectionAlgorithm.cs file, rename the MyCustomDataUniverseType class to to {vendorNameDatasetName}Universe +sed -i "s/MyCustomDataUniverseType/$vendorNameDatasetNameUniverse/g" DemonstrationUniverse.cs +sed -i "s/MyCustomDataUniverseType/$vendorNameDatasetNameUniverse/g" DemonstrationUniverse.py + +# In the {vendorNameDatasetNameUniverse}SelectionAlgorithm.cs file, rename the CustomDataAlgorithm class to {vendorNameDatasetNameUniverse}SelectionAlgorithm +sed -i "s/CustomDataUniverse/${vendorNameDatasetNameUniverse}SelectionAlgorithm/g" DemonstrationUniverse.cs +sed -i "s/CustomDataUniverse/${vendorNameDatasetNameUniverse}SelectionAlgorithm/g" DemonstrationUniverse.py + +# Rename the Lean.DataSource.vendorNameDatasetName/Demonstration.cs/py file to {vendorNameDatasetName}Algorithm.cs/py +mv Demonstration.cs ${vendorNameDatasetName}Algorithm.cs +mv Demonstration.py ${vendorNameDatasetName}Algorithm.py + +# Rename the Lean.DataSource.vendorNameDatasetName/DemonstrationUniverseSelectionAlgorithm.cs/py file to {vendorNameDatasetName}UniverseSelectionAlgorithm.cs/py +mv DemonstrationUniverse.cs ${vendorNameDatasetNameUniverse}SelectionAlgorithm.cs +mv DemonstrationUniverse.py ${vendorNameDatasetNameUniverse}SelectionAlgorithm.py + +# Rename the tests/MyCustomDataTypeTests.cs file to tests/{vendorNameDatasetName}Tests.cs +sed -i "s/MyCustomDataType/${vendorNameDatasetName}/g" tests/MyCustomDataTypeTests.cs +mv tests/MyCustomDataTypeTests.cs tests/${vendorNameDatasetName}Tests.cs + +# In tests/Tests.csproj, rename the Demonstration.cs and DemonstrationUniverse.cs to {vendorNameDatasetName}Algorithm.cs and {vendorNameDatasetNameUniverse}SelectionAlgorithm.cs +sed -i "s/Demonstration.cs/${vendorNameDatasetName}Algorithm.cs/g" tests/Tests.csproj +sed -i "s/DemonstrationUniverse.cs/${vendorNameDatasetNameUniverse}SelectionAlgorithm.cs/g" tests/Tests.csproj + +# In the MyCustomDataDownloader.cs and Program.cs files, rename the MyCustomDataDownloader to {vendorNameDatasetNameUniverse}DataDownloader +sed -i "s/MyCustomDataDownloader/${vendorNameDatasetNameUniverse}DataDownloader/g" DataProcessing/Program.cs +sed -i "s/MyCustomDataDownloader/${vendorNameDatasetNameUniverse}DataDownloader/g" DataProcessing/MyCustomDataDownloader.cs + +# Rename the DataProcessing/MyCustomDataDownloader.cs file to DataProcessing/{vendorNameDatasetName}DataDownloader.cs +mv DataProcessing/MyCustomDataDownloader.cs DataProcessing/${vendorNameDatasetName}DataDownloader.cs \ No newline at end of file diff --git a/tests/MyCustomDataTypeTests.cs b/tests/MyCustomDataTypeTests.cs new file mode 100644 index 0000000..c0b907c --- /dev/null +++ b/tests/MyCustomDataTypeTests.cs @@ -0,0 +1,99 @@ +/* + * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. + * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * +*/ + +using System; +using ProtoBuf; +using System.IO; +using System.Linq; +using ProtoBuf.Meta; +using Newtonsoft.Json; +using NUnit.Framework; +using QuantConnect.Data; +using QuantConnect.DataSource; + +namespace QuantConnect.DataLibrary.Tests +{ + [TestFixture] + public class MyCustomDataTypeTests + { + [Test] + public void JsonRoundTrip() + { + var expected = CreateNewInstance(); + var type = expected.GetType(); + var serialized = JsonConvert.SerializeObject(expected); + var result = JsonConvert.DeserializeObject(serialized, type); + + AssertAreEqual(expected, result); + } + + [Test] + public void ProtobufRoundTrip() + { + var expected = CreateNewInstance(); + var type = expected.GetType(); + + RuntimeTypeModel.Default[typeof(BaseData)].AddSubType(2000, type); + + using (var stream = new MemoryStream()) + { + Serializer.Serialize(stream, expected); + + stream.Position = 0; + + var result = Serializer.Deserialize(type, stream); + + AssertAreEqual(expected, result, filterByCustomAttributes: true); + } + } + + [Test] + public void Clone() + { + var expected = CreateNewInstance(); + var result = expected.Clone(); + + AssertAreEqual(expected, result); + } + + private void AssertAreEqual(object expected, object result, bool filterByCustomAttributes = false) + { + foreach (var propertyInfo in expected.GetType().GetProperties()) + { + // we skip Symbol which isn't protobuffed + if (filterByCustomAttributes && propertyInfo.CustomAttributes.Count() != 0) + { + Assert.AreEqual(propertyInfo.GetValue(expected), propertyInfo.GetValue(result)); + } + } + foreach (var fieldInfo in expected.GetType().GetFields()) + { + Assert.AreEqual(fieldInfo.GetValue(expected), fieldInfo.GetValue(result)); + } + } + + private BaseData CreateNewInstance() + { + return new MyCustomDataType + { + Symbol = Symbol.Empty, + Time = DateTime.Today, + DataType = MarketDataType.Base, + SomeCustomProperty = "This is some market related information" + }; + } + } +} \ No newline at end of file diff --git a/tests/Tests.csproj b/tests/Tests.csproj new file mode 100644 index 0000000..8e308d0 --- /dev/null +++ b/tests/Tests.csproj @@ -0,0 +1,23 @@ + + + net6.0 + QuantConnect.DataLibrary.Tests + + + + + + + + + + all + + + + + + + + +