Skip to content

Commit

Permalink
additional measures and bug fixing (#113)
Browse files Browse the repository at this point in the history
  • Loading branch information
AnastasiyaB authored May 19, 2020
1 parent 356822b commit a2632c8
Show file tree
Hide file tree
Showing 13 changed files with 554 additions and 226 deletions.
4 changes: 2 additions & 2 deletions gs_quant/api/gs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def query_data(cls, query: Union[DataQuery, MDAPIDataQuery], dataset_id: str = N
xref_keys = set(where.keys()).intersection(XRef.properties())
if xref_keys:
# Check that assetId is a symbol dimension of this data set. If not, we need to do a separate query
# to resolve xref --> assetId
# to resolve xref pip install dtaidistance--> assetId
if len(xref_keys) > 1:
raise MqValueError('Cannot not specify more than one type of asset identifier')

Expand Down Expand Up @@ -249,7 +249,7 @@ def get_many_definitions(cls,
return res

@staticmethod
def build_market_data_query(asset_ids: List[str], query_type: QueryType, where: Union[FieldFilterMap] = None,
def build_market_data_query(asset_ids: List[str], query_type: QueryType, where: Union[FieldFilterMap, dict] = None,
source: Union[str] = None, real_time: bool = False):
inner = {
'assetIds': asset_ids,
Expand Down
299 changes: 162 additions & 137 deletions gs_quant/made_with_gs_quant/7-Predicting Performance and Live Risk.ipynb

Large diffs are not rendered by default.

18 changes: 2 additions & 16 deletions gs_quant/markets/historical.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,8 @@
from .core import PricingContext
from gs_quant.base import Priceable
from gs_quant.datetime.date import date_range
from gs_quant.risk import ErrorValue, RiskMeasure
from gs_quant.risk.results import CompositeResultFuture, MultipleRiskMeasureResult


class HistoricalPricingFuture(CompositeResultFuture):

def _set_result(self):
results = [f.result() for f in self._futures]
base = next((r for r in results if not isinstance(r, (ErrorValue, Exception))), None)

if base is None:
self._result_future.set_result(results[0])
else:
result = MultipleRiskMeasureResult({k: base[k].compose(r[k] for r in results) for k in base.keys()})\
if isinstance(base, MultipleRiskMeasureResult) else base.compose(results)
self._result_future.set_result(result)
from gs_quant.risk import RiskMeasure
from gs_quant.risk.results import HistoricalPricingFuture


class HistoricalPricingContext(PricingContext):
Expand Down
3 changes: 2 additions & 1 deletion gs_quant/priceable.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ def _property_changed(self, prop: str):
super()._property_changed(prop)

if self.resolution_key and self.unresolved:
unresolved = self.unresolved
self.unresolved = None
self.from_instance(self.unresolved)
self.from_instance(unresolved)
self.resolution_key = None

def get_quantity(self) -> float:
Expand Down
18 changes: 16 additions & 2 deletions gs_quant/risk/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
under the License.
"""
from gs_quant.base import Priceable
from gs_quant.risk import RiskMeasure, RiskResult, aggregate_results
from gs_quant.risk import ErrorValue, RiskMeasure, RiskResult, aggregate_results

from concurrent.futures import Future
from functools import partial
Expand Down Expand Up @@ -76,6 +76,20 @@ def _set_result(self):
dict(zip(self.__risk_measures, (f.result() for f in self.futures)))))


class HistoricalPricingFuture(CompositeResultFuture):

def _set_result(self):
results = [f.result() for f in self._futures]
base = next((r for r in results if not isinstance(r, (ErrorValue, Exception))), None)

if base is None:
self._result_future.set_result(results[0])
else:
result = MultipleRiskMeasureResult({k: base[k].compose(r[k] for r in results) for k in base.keys()})\
if isinstance(base, MultipleRiskMeasureResult) else base.compose(results)
self._result_future.set_result(result)


class PortfolioRiskResult(RiskResult):

def __init__(self,
Expand Down Expand Up @@ -143,7 +157,7 @@ def __results(self,
instruments: Optional[Union[int, slice, str, Priceable, Iterable[Union[int, str, Priceable]]]] = (),
risk_measure: Optional[RiskMeasure] = None):
futures = self.__futures(instruments) if instruments or instruments == 0 else self._result.futures
scalar = isinstance(futures, (Future, MultipleRiskMeasureFuture))
scalar = isinstance(futures, (Future, HistoricalPricingFuture, MultipleRiskMeasureFuture))
risk_measure = self.risk_measures[0] if len(self.risk_measures) == 1 and not risk_measure else risk_measure

def result(future: Future):
Expand Down
1 change: 1 addition & 0 deletions gs_quant/target/backtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class BacktestRiskMeasureType(EnumBase, Enum):
Forward = 'Forward'
Implied_Volatility = 'Implied Volatility'
Fair_Variance = 'Fair Variance'
Strike_Level = 'Strike Level'

def __repr__(self):
return self.value
Expand Down
8 changes: 6 additions & 2 deletions gs_quant/test/timeseries/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,22 @@ def test_lag():
dates = pd.date_range("2019-01-01", periods=4, freq="D")
x = pd.Series([1.0, 2.0, 3.0, 4.0], index=dates)

result = lag(x)
result = lag(x, mode=LagMode.TRUNCATE)
expected = pd.Series([np.nan, 1.0, 2.0, 3.0], index=dates)
assert_series_equal(result, expected, obj="Lag")

result = lag(x, 2)
result = lag(x, 2, LagMode.TRUNCATE)
expected = pd.Series([np.nan, np.nan, 1.0, 2.0], index=dates)
assert_series_equal(result, expected, obj="Lag 2")

result = lag(x, 2, LagMode.EXTEND)
expected = pd.Series([np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], index=pd.date_range("2019-01-01", periods=6, freq="D"))
assert_series_equal(result, expected, obj="Lag 2 Extend")

result = lag(x, 2)
expected = pd.Series([np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], index=pd.date_range("2019-01-01", periods=6, freq="D"))
assert_series_equal(result, expected, obj="Lag 2 Extend")

y = pd.Series([0] * 4, index=pd.date_range('2020-01-01T00:00:00Z', periods=4, freq='S'))
with pytest.raises(Exception):
lag(y, 5, LagMode.EXTEND)
143 changes: 142 additions & 1 deletion gs_quant/test/timeseries/test_econometrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import pytest
from pandas.util.testing import assert_series_equal
from testfixtures import Replacer

from pandas import Timestamp
from math import isclose
from gs_quant.timeseries import *
from gs_quant.timeseries.econometrics import _get_ratio

Expand Down Expand Up @@ -464,5 +465,145 @@ def test_sharpe_ratio():
numpy.testing.assert_almost_equal(actual.values, er_df['SR'].values, decimal=5)


def test_arima_fit():
test_dict = {
'High': {
Timestamp('1989-01-03 00:00:00'): 3.575721263885498,
Timestamp('1989-01-04 00:00:00'): 3.5857372283935547,
Timestamp('1989-01-05 00:00:00'): 3.62580132484436,
Timestamp('1989-01-06 00:00:00'): 3.62580132484436,
Timestamp('1989-01-09 00:00:00'): 3.575721263885498,
Timestamp('1989-01-10 00:00:00'): 3.575721263885498,
Timestamp('1989-01-11 00:00:00'): 3.5657050609588623,
Timestamp('1989-01-12 00:00:00'): 3.635817289352417,
Timestamp('1989-01-13 00:00:00'): 3.615785360336304,
Timestamp('1989-01-16 00:00:00'): 3.615785360336304,
Timestamp('1989-01-17 00:00:00'): 3.635817289352417,
Timestamp('1989-01-18 00:00:00'): 3.675881385803223,
Timestamp('1989-01-19 00:00:00'): 3.695913553237915,
Timestamp('1989-01-20 00:00:00'): 3.665865421295166,
Timestamp('1989-01-23 00:00:00'): 3.675881385803223,
Timestamp('1989-01-24 00:00:00'): 3.675881385803223,
Timestamp('1989-01-25 00:00:00'): 3.695913553237915,
Timestamp('1989-01-26 00:00:00'): 3.7760417461395264,
Timestamp('1989-01-27 00:00:00'): 3.8561699390411377,
Timestamp('1989-01-30 00:00:00'): 3.8561699390411377},
'Low': {
Timestamp('1989-01-03 00:00:00'): 3.4855768680572514,
Timestamp('1989-01-04 00:00:00'): 3.5356571674346924,
Timestamp('1989-01-05 00:00:00'): 3.575721263885498,
Timestamp('1989-01-06 00:00:00'): 3.575721263885498,
Timestamp('1989-01-09 00:00:00'): 3.5356571674346924,
Timestamp('1989-01-10 00:00:00'): 3.5356571674346924,
Timestamp('1989-01-11 00:00:00'): 3.5256409645080566,
Timestamp('1989-01-12 00:00:00'): 3.5456731319427486,
Timestamp('1989-01-13 00:00:00'): 3.5857372283935547,
Timestamp('1989-01-16 00:00:00'): 3.5957531929016118,
Timestamp('1989-01-17 00:00:00'): 3.5857372283935547,
Timestamp('1989-01-18 00:00:00'): 3.615785360336304,
Timestamp('1989-01-19 00:00:00'): 3.655849456787109,
Timestamp('1989-01-20 00:00:00'): 3.62580132484436,
Timestamp('1989-01-23 00:00:00'): 3.615785360336304,
Timestamp('1989-01-24 00:00:00'): 3.615785360336304,
Timestamp('1989-01-25 00:00:00'): 3.655849456787109,
Timestamp('1989-01-26 00:00:00'): 3.665865421295166,
Timestamp('1989-01-27 00:00:00'): 3.79607367515564,
Timestamp('1989-01-30 00:00:00'): 3.786057710647583},
'Close': {
Timestamp('1989-01-03 00:00:00'): 3.5256409645080566,
Timestamp('1989-01-04 00:00:00'): 3.5857372283935547,
Timestamp('1989-01-05 00:00:00'): 3.575721263885498,
Timestamp('1989-01-06 00:00:00'): 3.575721263885498,
Timestamp('1989-01-09 00:00:00'): 3.575721263885498,
Timestamp('1989-01-10 00:00:00'): 3.5556890964508057,
Timestamp('1989-01-11 00:00:00'): 3.5556890964508057,
Timestamp('1989-01-12 00:00:00'): 3.605769157409668,
Timestamp('1989-01-13 00:00:00'): 3.605769157409668,
Timestamp('1989-01-16 00:00:00'): 3.5957531929016118,
Timestamp('1989-01-17 00:00:00'): 3.62580132484436,
Timestamp('1989-01-18 00:00:00'): 3.675881385803223,
Timestamp('1989-01-19 00:00:00'): 3.665865421295166,
Timestamp('1989-01-20 00:00:00'): 3.6458332538604736,
Timestamp('1989-01-23 00:00:00'): 3.62580132484436,
Timestamp('1989-01-24 00:00:00'): 3.675881385803223,
Timestamp('1989-01-25 00:00:00'): 3.675881385803223,
Timestamp('1989-01-26 00:00:00'): 3.756009578704834,
Timestamp('1989-01-27 00:00:00'): 3.79607367515564,
Timestamp('1989-01-30 00:00:00'): 3.846153736114502},
}

test_df = pd.DataFrame(test_dict)
arima = econometrics.Arima()

train_size_values = [0.75, int(0.75 * len(test_df)), None]
for train_size in train_size_values:
arima.fit(test_df, train_size=train_size, freq='B', q_vals=[0])
transformed_test_df = arima.transform(test_df)

for col in transformed_test_df.keys():
count_nans = arima.best_params[col].p + arima.best_params[col].d
assert (count_nans == transformed_test_df[col].isna().sum())

# Test (2,1,0) Model
test_df_high = test_df['High'].diff()
assert (isclose(transformed_test_df['High'][3], (arima.best_params['High'].const + test_df_high[2] *
arima.best_params['High'].ar_coef[0] + test_df_high[1] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['High'][4], (arima.best_params['High'].const + test_df_high[3] *
arima.best_params['High'].ar_coef[0] + test_df_high[2] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['High'][-1], (arima.best_params['High'].const + test_df_high[-2] *
arima.best_params['High'].ar_coef[0] + test_df_high[-3] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))

# Test (2,2,0) Model
test_df_low = test_df['Low'].diff().diff()
assert (isclose(transformed_test_df['Low'][4], (arima.best_params['Low'].const + test_df_low[3] *
arima.best_params['Low'].ar_coef[0] + test_df_low[2] *
arima.best_params['Low'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['Low'][5], (arima.best_params['Low'].const + test_df_low[4] *
arima.best_params['Low'].ar_coef[0] + test_df_low[3] *
arima.best_params['Low'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['Low'][-1], (arima.best_params['Low'].const + test_df_low[-2] *
arima.best_params['Low'].ar_coef[0] + test_df_low[-3] *
arima.best_params['Low'].ar_coef[1]), abs_tol=1e-8))

# Test (2,1,0) Model
test_df_close = test_df['Close'].diff()
assert (isclose(transformed_test_df['Close'][3], (arima.best_params['Close'].const + test_df_close[2] *
arima.best_params['Close'].ar_coef[0] + test_df_close[1] *
arima.best_params['Close'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['Close'][4], (arima.best_params['Close'].const + test_df_close[3] *
arima.best_params['Close'].ar_coef[0] + test_df_close[2] *
arima.best_params['Close'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_df['Close'][-1], (arima.best_params['Close'].const + test_df_close[-2] *
arima.best_params['Close'].ar_coef[0] + test_df_close[-3] *
arima.best_params['Close'].ar_coef[1]), abs_tol=1e-8))

# Test if input is pd.Series
test_high_series = pd.Series(test_df['High'])
arima.fit(test_high_series, train_size=0.75, freq='B', q_vals=[0])
transformed_test_series = arima.transform(test_high_series)
test_series_high = test_df['High'].diff()
assert (isclose(transformed_test_series['High'][3], (arima.best_params['High'].const + test_series_high[2] *
arima.best_params['High'].ar_coef[0] + test_series_high[1] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_series['High'][4], (arima.best_params['High'].const + test_series_high[3] *
arima.best_params['High'].ar_coef[0] + test_series_high[2] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))
assert (isclose(transformed_test_series['High'][-1], (arima.best_params['High'].const + test_series_high[-2] *
arima.best_params['High'].ar_coef[0] + test_series_high[-3] *
arima.best_params['High'].ar_coef[1]), abs_tol=1e-8))

# Test if p=0 and d=0
new_arima = econometrics.Arima()
zero_resid = test_high_series.copy(deep=True)
zero_resid[:] = 0
new_arima.best_params = {'High': econometrics.ARIMABestParams(p=0, q=0, d=0, const=0, ar_coef=[0], ma_coef=[],
resid=zero_resid, series=test_high_series)}
transformed_test_df = new_arima.transform(test_high_series)
assert_series_equal(transformed_test_df['High'], test_df['High'])


if __name__ == "__main__":
pytest.main(args=["test_econometrics.py"])
20 changes: 4 additions & 16 deletions gs_quant/test/timeseries/test_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import datetime
import datetime as dt
import unittest.mock
from typing import Union

import pandas as pd
Expand All @@ -39,7 +38,7 @@
from gs_quant.session import GsSession, Environment
from gs_quant.target.common import XRef, PricingLocation, Currency as CurrEnum
from gs_quant.test.timeseries.utils import mock_request
from gs_quant.timeseries.measures import BenchmarkType, VolReference
from gs_quant.timeseries.measures import BenchmarkType
from gs_quant.api.gs.data import QueryType

_index = [pd.Timestamp('2019-01-01')]
Expand Down Expand Up @@ -270,9 +269,7 @@ def test_currency_to_mdapi_basis_swap_rate_asset(mocker):


def test_check_clearing_house(mocker):
for ch in tm_rates._ClearingHouse.__members__:
assert ch == tm_rates._check_clearing_house(ch)

assert tm_rates._ClearingHouse.CME == tm_rates._check_clearing_house(tm_rates._ClearingHouse.CME)
assert tm_rates._ClearingHouse.LCH == tm_rates._check_clearing_house(None)
invalid_ch = ['NYSE']
for ch in invalid_ch:
Expand Down Expand Up @@ -1753,8 +1750,6 @@ def test_var_term():


def _vol_term_typical(reference, value):
from gs_quant.target.common import FieldFilterMap

assert DataContext.current_is_set
data = {
'tenor': ['1w', '2w', '1y', '2y'],
Expand All @@ -1766,7 +1761,6 @@ def _vol_term_typical(reference, value):
replace = Replacer()
market_mock = replace('gs_quant.timeseries.measures.GsDataApi.get_market_data', Mock())
market_mock.return_value = out
ffm_mock = replace('gs_quant.timeseries.measures.FieldFilterMap', Mock(spec=FieldFilterMap))

actual = tm.vol_term(Index('MA123', AssetClass.Equity, '123'), reference, value)
idx = pd.DatetimeIndex(['2018-01-08', '2018-01-15', '2019-01-01', '2020-01-01'], name='expirationDate')
Expand All @@ -1779,8 +1773,7 @@ def _vol_term_typical(reference, value):
assert_series_equal(expected, pd.Series(actual))
assert actual.dataset_ids == _test_datasets
market_mock.assert_called_once()
ffm_mock.assert_called_once_with(relativeStrike=value if reference == tm.VolReference.NORMALIZED else value / 100,
strikeReference=unittest.mock.ANY)

replace.restore()
return actual

Expand Down Expand Up @@ -1814,8 +1807,6 @@ def test_vol_term():


def _vol_term_fx(reference, value):
from gs_quant.target.common import FieldFilterMap

assert DataContext.current_is_set
data = {
'tenor': ['1w', '2w', '1y', '2y'],
Expand All @@ -1827,7 +1818,6 @@ def _vol_term_fx(reference, value):
replace = Replacer()
market_mock = replace('gs_quant.timeseries.measures.GsDataApi.get_market_data', Mock())
market_mock.return_value = out
ffm_mock = replace('gs_quant.timeseries.measures.FieldFilterMap', Mock(spec=FieldFilterMap))
cross_mock = replace('gs_quant.timeseries.measures.cross_stored_direction_for_fx_vol', Mock())
cross_mock.return_value = 'EURUSD'

Expand All @@ -1842,9 +1832,7 @@ def _vol_term_fx(reference, value):
assert_series_equal(expected, pd.Series(actual))
assert actual.dataset_ids == _test_datasets
market_mock.assert_called_once()
ffm_mock.assert_called_once_with(relativeStrike=value * -1 if reference == VolReference.DELTA_PUT else value,
strikeReference='delta' if reference.value.lower().startswith(
'delta') else reference.value)

replace.restore()
return actual

Expand Down
2 changes: 1 addition & 1 deletion gs_quant/timeseries/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class LagMode(Enum):


@plot_function
def lag(x: pd.Series, obs: int = 1, mode: LagMode = LagMode.TRUNCATE) -> pd.Series:
def lag(x: pd.Series, obs: int = 1, mode: LagMode = LagMode.EXTEND) -> pd.Series:
"""
Lag timeseries by a specified number of observations
Expand Down
Loading

0 comments on commit a2632c8

Please sign in to comment.