diff --git a/README.md b/README.md index 4e3283d..564b604 100644 --- a/README.md +++ b/README.md @@ -93,8 +93,9 @@ KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX PyKX depends on the following third-party Python packages: - `pandas>=1.2, < 2.2.0` -- `numpy~=1.22; python_version<'3.11'` -- `numpy~=1.23.2; python_version=='3.11'` +- `numpy~=1.22, <2.0; python_version<'3.11'` +- `numpy~=1.23, <2.0; python_version=='3.11'` +- `numpy~=1.26, <2.0; python_version=='3.12'` - `pytz>=2022.1` - `toml~=0.10.2` @@ -142,6 +143,13 @@ Windows: * [dlfcn-win32](https://github.com/dlfcn-win32/dlfcn-win32). Can be installed using [Vcpkg](https://github.com/microsoft/vcpkg). * `msvcr100.dll`. Available in [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999). +To install the above dependencies, you can run the `w64_install.ps1` script as an administrator: + +```PowerShell +cd pykx +.\w64_install.ps1 +``` + ### Building Using a Python virtual environment is recommended: diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml index 87fed87..0fde6e5 100644 --- a/conda-recipe/conda_build_config.yaml +++ b/conda-recipe/conda_build_config.yaml @@ -3,3 +3,4 @@ python: - 3.9 - 3.10 - 3.11 + - 3.12 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index a781dfc..fbcc5cc 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -16,17 +16,20 @@ requirements: build: - git - python - - setuptools==60.9.3 - - setuptools_scm[toml]>=6.0.1 + - setuptools>=68.0 + - setuptools_scm[toml]>=7.1.0 # [py==37] + - setuptools_scm[toml]>=8.0.0 # [py!=37] - cython==3.0.0 - - numpy==1.22.* + - numpy==1.26 # [py==312] + - numpy==1.22.* # [py!=37 and py<312] + - numpy==1.20 # [py==37] - tomli>=2.0.1 - - wheel>=0.36.2 + - wheel>=0.36 - sysroot_linux-64 # [linux64] run: - python - - numpy>=1.22 + - numpy>=1.22,<2.0 - pandas>=1.2, <2.2.0 - pytz>=2022.1 - toml>=0.10.2 diff --git a/docs/api/compress.md b/docs/api/compress.md new file mode 100644 index 0000000..de34fcf --- /dev/null +++ b/docs/api/compress.md @@ -0,0 +1,12 @@ +# Compression and Encryption APIs + +::: pykx.compress_encrypt + rendering: + show_root_heading: false + options: + show_root_heading: false + members_order: source + members: + - CompressionAlgorithm + - Compress + - Encrypt diff --git a/docs/api/console.md b/docs/api/console.md deleted file mode 100644 index 2f0b80c..0000000 --- a/docs/api/console.md +++ /dev/null @@ -1,3 +0,0 @@ -# Console - -::: pykx.console diff --git a/docs/api/ctx.md b/docs/api/ctx.md deleted file mode 100644 index 8beae73..0000000 --- a/docs/api/ctx.md +++ /dev/null @@ -1,3 +0,0 @@ -# Context Interface - -::: pykx.ctx diff --git a/docs/api/embedded_q.md b/docs/api/embedded_q.md deleted file mode 100644 index 4021920..0000000 --- a/docs/api/embedded_q.md +++ /dev/null @@ -1,5 +0,0 @@ -# Q & Embedded Q - -::: pykx.Q - -::: pykx.EmbeddedQ diff --git a/docs/api/pykx-execution/q.md b/docs/api/pykx-execution/q.md index fa64b58..4a80a91 100644 --- a/docs/api/pykx-execution/q.md +++ b/docs/api/pykx-execution/q.md @@ -168,8 +168,9 @@ pykx.List(q(' Write global tables to disk as splayed, enumerated, indexed q tables. ```python ->>> pykx.q('t: ([] x: 1 2 3; y: 10 20 30)') ->>> pykx.q.dsave(':v', 't') +>>> from pathlib import Path +>>> pykx.q['t'] = kx.Table(data={'x': [1, 2, 3], 'y': [10, 20, 30]}) +>>> pykx.q.dsave(Path('v'), 't') pykx.SymbolAtom(q('`t')) ``` @@ -178,7 +179,7 @@ pykx.SymbolAtom(q('`t')) Read or memory-map a variable or q data file. ```python ->>> pykx.q('a: 10') +>>> pykx.q['a'] = 10 >>> pykx.q.get('a') pykx.LongAtom(q('10')) ``` diff --git a/docs/api/pykx-q-data/type_conversions.md b/docs/api/pykx-q-data/type_conversions.md index 6feb022..0e4b1d7 100644 --- a/docs/api/pykx-q-data/type_conversions.md +++ b/docs/api/pykx-q-data/type_conversions.md @@ -771,11 +771,11 @@ True Converting these types to python will return a float object or a `float64` object in numpy's case. ```Python - >>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').py(raw=True) + >>> kx.q('0001.02.03T04:05:06.007 0001.02.03T04:05:06.007').py(raw=True) [-730085.8297915857, -730085.8297915857] - >>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').np(raw=True) + >>> kx.q('0001.02.03T04:05:06.007 0001.02.03T04:05:06.007').np(raw=True) array([-730085.82979159, -730085.82979159]) - >>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').np(raw=True).dtype + >>> kx.q('0001.02.03T04:05:06.007 0001.02.03T04:05:06.007').np(raw=True).dtype dtype('float64') ``` @@ -1081,7 +1081,9 @@ True Calling `.py()` on a `pykx.Table` will return a python `dict` object. ```Python - >>> kx.q('([] a: 10?10; b: 10?10)').py() + >>> kx.Table(data={ + ... 'a': kx.random.random(10, 10), + ... 'b': kx.random.random(10, 10)}).py() {'a': [5, 6, 4, 1, 3, 3, 7, 8, 2, 1], 'b': [8, 1, 7, 2, 4, 5, 4, 2, 7, 8]} ``` @@ -1089,7 +1091,9 @@ True Calling `.np()` on a `pykx.Table` will return a numpy `record` array of the rows of the table with each type converted to it closest analogous numpy type. ```Python - >>> kx.q('([] a: 10?10; b: 10?10)').np() + >>> kx.Table(data={ + ... 'a': kx.random.random(10, 10), + ... 'b': kx.random.random(10, 10)}).np() rec.array([(9, 9), (9, 7), (2, 6), (5, 6), (4, 4), (2, 7), (5, 8), (8, 4), (7, 4), (9, 6)], dtype=[('a', '>> kx.q('([] a: 10?10; b: 10?10)').pd() + >>> kx.Table(data={ + ... 'a': kx.random.random(10, 10), + ... 'b': kx.random.random(10, 10)}).pd() a b 0 1 9 1 0 7 @@ -1137,7 +1143,9 @@ True Calling `.pa()` on a `pykx.Table` will return a pyarrow `Table`. ```Python - >>> kx.q('([] a: 10?10; b: 10?10)').pa() + >>> kx.Table(data={ + ... 'a': kx.random.random(10, 10), + ... 'b': kx.random.random(10, 10)}).pa() pyarrow.Table a: int64 b: int64 diff --git a/docs/api/pykx_under_q.md b/docs/api/pykx_under_q.md deleted file mode 100644 index b68e306..0000000 --- a/docs/api/pykx_under_q.md +++ /dev/null @@ -1,891 +0,0 @@ -# pykx.q Library Reference Card - -This page documents the functions found in the `pykx.q` q library that are available. - -This library can be installed by calling a helper function within `PyKX`, this function will move -all the required files and libraries into your `QHOME` directory. - -```python -import pykx as kx -kx.install_into_QHOME() -``` - -or equivalently using only command line - -```python -python -c "import pykx;pykx.install_into_QHOME()" -``` - -If you previously had `embedPy` installed pass: - -```python -python -c "import pykx;pykx.install_into_QHOME(overwrite_embedpy=True)" -``` - -If you cannot edit files in `QHOME` you can copy the files to your local folder and load `pykx.q` from there: - -```bash -python -c "import pykx;pykx.install_into_QHOME(to_local_folder=True)" -``` - -## Gain access to the `.pykx` namespace within the `q` session - -```q -q)\l pykx.q -``` - -## `.pykx.eval` - -_[Evaluates](https://docs.python.org/3/library/functions.html#eval) a `string` as python code and return the result as a wrapped `foreign` type._ - -```q -.pykx.eval[pythonCode] -``` - -**Parameters:** - -name | type | description | --------------|-----------|-------------| -`pythonCode` | string | A string of Python code to be executed returning the result as a wrapped foreign object. | - -**Return:** - -type | description ------|------------ -`composition` | A wrapped foreign object which can be converted to q or Python objects - - -```q -// Evaluate the code and return as a wrapped foreign object -q).pykx.eval"1+1" -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist - -// Evaluate the code and convert to Python foreign -q).pykx.eval["1+1"]`. -foreign - -// Evaluate the code and convert to a q object -q).pykx.eval["lambda x: x + 1"][5]` -6 -``` - -## `.pykx.pyeval` - -_[Evaluates](https://docs.python.org/3/library/functions.html#eval) a `CharVector` as python code and return the result as a `q` foreign._ - -```q -.pykx.pyeval[pythonCode] -``` - -**Parameters:** - -name | type | description | --------------|-----------|-------------| -`pythonCode` | string | A string of Python code to be evaluated returning the result as a q foreign object. | - -**Return:** - - type | description | --------|-------------| - `foreign` | The return of the Python string evaluation returned as a q foreign. | - -```q -// evaluate a Python string -q).pykx.pyeval"1+1" -foreign - -// Use a function defined in Python taking a single argument -q).pykx.pyeval["lambda x: x + 1"][5] -foreign - -// Use a function defined in Python taking multiple arguments -q).pykx.pyeval["lambda x, y: x + y"][4;5] -foreign -``` - -## `.pykx.pyexec` - -_[Executes](https://docs.python.org/3/library/functions.html#exec) a `CharVector` as python code in Python memory._ - -```q -.pykx.pyexec[pythonCode] -``` - -**Parameters:** - -name | type | description | --------------|-----------|-------------| -`pythonCode` | string | A string of Python code to be executed. | - -**Return:** - - type | description | -------|-------------| - `::` | Returns generic null on successful execution, will return an error if execution of Python code is unsuccessful. | - - -```q -// Execute valid Python code -q).pykx.pyexec"1+1" -q).pykx.pyexec"a = 1+1" - -// Evaluate the Python code returning the result to q -q).pykx.qeval"a" -2 - -// Attempt to execute invalid Python code -q).pykx.pyexec"1+'test'" -'TypeError("unsupported operand type(s) for +: 'int' and 'str'") - [0] .pykx.pyexec["1+'test'"] - ^ -``` - -## `.pykx.qeval` - -_[Evaluates](https://docs.python.org/3/library/functions.html#eval) a `CharVector` in Python returning the result as a q object._ - -```q -.pykx.qeval[pythonCode] -``` - -**Parameters:** - -name | type | description | --------------|-----------|-------------| -`pythonCode` | string | A string of Python code to be evaluated returning the result as a q object. | - -**Return:** - - type | description | --------|-------------| - `any` | The return of the Python string evaluation returned as a q object. | - -```q -// evaluate a Python string -q).pykx.qeval"1+1" -2 - -// Use a function defined in Python taking a single argument -q).pykx.qeval["lambda x: x + 1"][5] -6 - -// Use a function defined in Python taking multiple arguments -q).pykx.qeval["lambda x, y: x + y"][4;5] -9 -``` - -## `.pykx.get` - -_Retrieve a named item from the Python memory_ - -```q -.pykx.get[objectName] -``` - -**Parameters:** - -name | type | description | ---------------|-----------|-------------| -`objectName` | symbol | A named entity to retrieve from Python memory as a wrapped q foreign object. | - -**Return:** - -type | description ------|------------ -`composition` | A wrapped foreign object which can be converted to q or Python objects - - -```q -// Set an item in Python memory and retrieve using .pykx.get -q).pykx.set[`test;til 10] -q).pykx.get[`test] -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist - -// Convert to q and Python objects -q).pykx.get[`test]` -0 1 2 3 4 5 6 7 8 9 - -// Retrieve an item defined entirely using Python -q).pykx.pyexec"import numpy as np" -q).pykx.pyexec"a = np.array([1, 2, 3])" -q).pykx.get[`a]` -1 2 3 -``` - -## `.pykx.set` - -_Set a q object to a named and type specified object in Python memory_ - -```q -.pykx.set[objectName;qObject] -``` - -**Parameters:** - -name | type | description | --------------|--------|-------------| -`objectName` | symbol | The name to be associated with the q object being persisted to Python memory | -`qObject` | any | The q/Python entity that is to be stored to Python memory - -**Return:** - -type | description ------|------------ -`::` | Returns null on successful execution - -```q -// Set a q array of guids using default behaviour -q).pykx.set[`test;3?0Ng] -q)print .pykx.get`test -[UUID('3d13cc9e-f7f1-c0ee-782c-5346f5f7b90e') - UUID('c6868d41-fa85-233b-245f-55160cb8391a') - UUID('e1e5fadd-dc8e-54ba-e30b-ab292df03fb0')] - -// Set a q table as pandas dataframe -q).pykx.set[`test;.pykx.topd ([]5?1f;5?1f)] -q)print .pykx.get`test - x x1 -0 0.301772 0.392752 -1 0.785033 0.517091 -2 0.534710 0.515980 -3 0.711172 0.406664 -4 0.411597 0.178084 - -// Set a q table as pyarrow table -q).pykx.set[`test;.pykx.topa ([]2?0p;2?`a`b`c;2?1f;2?0t)] -q)print .pykx.get`test -pyarrow.Table -x: timestamp[ns] -x1: string -x2: double -x3: duration[ns] ----- -x: [[2002-06-11 11:57:24.452442976,2001-12-28 01:34:14.199305176]] -x1: [["c","a"]] -x2: [[0.7043314231559634,0.9441670505329967]] -x3: [[2068887000000,41876091000000]] -``` - -## `.pykx.import` - -_Import a Python library and store as a wrapped foreign object to allow use in q projections/evaluation._ - -```q -.pykx.import[libName] -``` - -**Parameters:** - -name | type | description | -----------|--------|-------------| -`libName` | symbol | The name of the Python library/module to imported for use | - -**Return:** - -type | description ---------------|------------ -`composition` | Returns a wrapped foreign object associated with an imported library on success, otherwise will error if library/module cannot be imported. - -```q -// Import numpy for use as a q object named numpy -q)np:.pykx.import`numpy -q).pykx.print np - - -// Use a function from within the numpy library using attribute retrieval -q).pykx.print np[`:arange] - -q)np[`:arange][10]` -0 1 2 3 4 5 6 7 8 9 -``` - -## `.pykx.console` - -_Open an interactive python REPL from within a q session similar to launching `python` from the command line._ - -```q -.pykx.console[null] -``` - -**Parameters:** - -| name | type | description | -|--------|------|-------------| -| `null` | null | Activation of the `.pykx.console` does not require any input | - -**Return:** - -| type | description | -|--------|-------------| -| `::` | This function has no explicit return but execution of the function will initialise a Python repl. | - -```q -// Enter PyKX console and evaluate Python code -q).pykx.console[] ->>> 1+1 -2 ->>> list(range(10)) -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ->>> quit() -q) - -// Enter PyKX console setting q objects using PyKX -q).pykx.console[] ->>> import pykx as kx ->>> kx.q['table'] = kx.q('([]2?1f;2?0Ng;2?`3)' ->>> quit() -q)table -x x1 x2 --------------------------------------------------- -0.439081 49f2404d-5aec-f7c8-abba-e2885a580fb6 mil -0.5759051 656b5e69-d445-417e-bfe7-1994ddb87915 igf -``` - - -## `.pykx.wrap` - -_Convert a foreign object generated from Python execution to a callable `q` object._ - -```q -.pykx.wrap[pyForeign] -``` - -**Parameters:** - -name | type | description | -------------|---------|-------------| -`pyForeign` | foreign | A Python object which is to be converted to a callable q object. | - -**Returns:** - -type | description | ---------------|-------------| -`composition` | The Python object wrapped such that it can be called using q | - -```q -// Create a q foreign object in Python -q)a:.pykx.pyeval"pykx.Foreign([1, 2, 3])" -q)a -foreign -q).pykx.print a -[1, 2, 3] - -// Wrap the foreign object and convert to q -q)b:.pykx.wrap a -q)b -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)b` -1 2 3 -``` - -## `.pykx.unwrap` - -_Convert a wrapped foreign object generated from this interface into a python foreign._ - -```q -.pykx.unwrap[wrapObj] -``` - -**Parameters:** - - name | type | description | ------------|---------------------|-------------| - `wrapObj` | composition/foreign | A (un)wrapped Python foreign object. | - -**Returns:** - - type | description | ------------|-------------| - `foreign` | The unwrapped representation of the Python foreign object. | - -```q -// Generate an object which returns a wrapped Python foreign -q).pykx.set[`test;.pykx.topd ([]2?0p;2?`a`b`c;2?1f;2?0t)] -q)a:.pykx.get`test -q)show a -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist - -// Unwrap the wrapped object -q).pykx.unwrap a -foreign -``` - -## `.pykx.getattr` - -_Retrieve an attribute or property form a foreign Python object returning another foreign._ - -```q -.pykx.getattr[pythonObject;attrName] -``` - -**Parameters:** - -name | type | description | ----------------|---------------------|-------------| -`pythonObject` | foreign/composition | The Python object from which the defined attribute is to be retrieved | -`attrName` | symbol | The name of the attribute to be retrieved. | - -**Returns:** - - type | description | ------------|-------------| - `foreign` | An unwrapped foreign object containing the retrieved - -!!!Note - Application of this function is equivalent to calling Python's [`getattr(f, 'x')`](https://docs.python.org/3/library/functions.html#getattr) function. - - The wrapped foreign objects provide a shorthand version of calling `.pykx.getattr`. Through the use of the ````:x``` syntax for attribute/property retrieval - -```q -// Define a class object from which to retrieve Python attributes -q).pykx.pyexec"aclass = type('TestClass', (object,), {'x': pykx.LongAtom(3), 'y': pykx.toq('hello')})"; - -// Retrieve the class object from Python as a q foreign -q)show a:.pykx.get[`aclass]`. -foreign - -// Retrieve an attribute from the Python foreign -q).pykx.getattr[a;`y] -foreign - -// Print the Python representation of the foreign object -q)print .pykx.getattr[a;`y] -hello - -// Retrieve the attribute from a Python foreign and convert to q -q).pykx.wrap[.pykx.getattr[a;`y]]` -`hello -``` - -## `.pykx.setattr` - -_Set an attribute of a Python object, this is equivalent to calling Python's [`setattr(f, a, x)`](https://docs.python.org/3/library/functions.html#setattr) function_ - -```q -.pykx.setattr[pythonObject;attrName;attrObj] -``` - -**Parameters:** - -name | type | description | ----------------|---------------------|-------------| -`pythonObject` | foreign/composition | The Python object on which the defined attribute is to be set | -`attrName` | symbol | The name to be associated with the set attribute | -`attrObject` | any | The object which is to be set as an attribute associated with `pythonObject` | - -**Returns:** - -type | description | ------|-------------| -`::` | Returns generic null on successful execution otherwise returns the error message raised - - -```q -// Define a Python object to which attributes can be set -q).pykx.pyexec"aclass = type('TestClass', (object,), {'x': pykx.LongAtom(3), 'y': pykx.toq('hello')})"; -q)a:.pykx.get`aclass - -// Retrieve an existing attribute to show defined behaviour -q)a[`:x]` -3 - -// Retrieve a named attribute that doesn't exist -q)a[`:r]` - -// Set an attribute 'r' and retrieve the return -q).pykx.setattr[a; `r; til 4] -q)a[`:r]` -0 1 2 3 -q).pykx.print a[`:r] -[0 1 2 3] - -// Set an attribute 'k' to be a Pandas type -q).pykx.setattr[a;`k;.pykx.topd ([]2?1f;2?0Ng;2?`2)] -q)a[`:k]` -x x1 x2 -------------------------------------------------- -0.4931835 0a3e1784-0125-1b68-5ae7-962d49f2404d mi -0.5785203 5aecf7c8-abba-e288-5a58-0fb6656b5e69 ig -q).pykx.print a[`:k] - x x1 x2 -0 0.493183 0a3e1784-0125-1b68-5ae7-962d49f2404d mi -1 0.578520 5aecf7c8-abba-e288-5a58-0fb6656b5e69 ig - -// Attempt to set an attribute against an object which does not support this behaviour -q)arr:.pykx.eval"[1, 2, 3]" -q).pykx.setattr[arr;`test;5] -'AttributeError("'list' object has no attribute 'test'") - [1] /opt/kx/pykx.q:218: .pykx.i.setattr: - cx:count x; - i.load[(`set_attr;4)][unwrap x 0;x 1;;x 2] - ^ - $[cx>4; -``` - -## `.pykx.setdefault` - -_Define the default conversion type for KX objects when converting from q to Python_ - -```q -.pykx.setdefault[conversionFormat] -``` - -**Parameters:** - -name | type | description | --------------------|--------|-------------| -`conversionFormat` | string | The Python data format to which all q objects when passed to Python will be converted. | - - -**Returns:** - -type | description | ------|-------------| -`::` | Returns generic null on successful execution and updates variable `.pykx.i.defaultConv` - -??? "Supported Options" - - The following outline the supported conversion types and the associated values which can be passed to set these values - - Conversion Format | Accepted inputs | - ---------------------------------------------------------------|------------------------------| - [Numpy](https://numpy.org/) | `"np", "numpy", "Numpy"` | - [Pandas](https://pandas.pydata.org/docs/user_guide/index.html) | `"pd", "pandas", "Pandas"` | - [Python](https://docs.python.org/3/library/datatypes.html) | `"py", "python", "Python"` | - [PyArrow](https://arrow.apache.org/docs/python/index.html) | `"pa", "pyarrow", "PyArrow"` | - [K](type_conversions.md) | `"k", "q"` | - -```q -// Default value on startup is `"np"` -q).pykx.i.defaultConv -"np" - -// Set default value to Pandas -q).pykx.setdefault["Pandas"] -q).pykx.i.defaultConv -"pd" -``` - -## `.pykx.print` - -_Print a python object directly to stdout. This is equivalent to calling `print()` on the object in Python._ - -```q -.pykx.print[pythonObject] -print[pythonObject] -``` - -**Parameters:** - -name | type | description | ----------------|-------------------|-------------| -`pythonObject` | (wrapped) foreign | A Python object retrieved from the Python memory space, if passed a q object this will be 'shown' | - -**Return:** - -type | description ------|------------ -`::` | Will print the output to stdout but return null - -!!!Note - For back compatibility with embedPy this function is also supported in the shorthand form `print` which uses the `.q` namespace. To not overwrite `print` in your q session and allow use only of the longhand form `.pykx.print` set the environment variable `UNSET_PYKX_GLOBALS` to any value. - -```q -// Use a wrapped foreign object -q)a: .pykx.eval"1+1" -q).pykx.print a -2 - -// Use a foreign object -q)a: .pykx.eval"'hello world'" -q).pykx.print a`. -hello world - -// Use a q object -q).pykx.print til 5 -0 1 2 3 4 - -// Use the shorthand "print" function -q)a:.pykx.eval"'hello world'" -q)print a -hello world -``` - -## `.pykx.repr` - -_Evaluate the python function `repr()` on an object retrieved from Python memory_ - -```q -.pykx.repr[pythonObject] -``` - -**Parameters:** - -name | type | description | ----------------|-------------------|-------------| -`pythonObject` | (wrapped) foreign | A Python object retrieved from the Python memory space, if passed a q object this will retrieved using [`.Q.s1`](https://code.kx.com/q/ref/dotq/#qs1-string-representation). | - -**Return:** - -type | description ----------|------------ -`string` | The string representation of the Python/q object - -```q -// Use a wrapped foreign object -q)a: .pykx.eval"1+1" -q).pykx.repr a -,"2" - -// Use a foreign object -q)a: .pykx.eval"'hello world'" -q).pykx.repr a`. -"hello world" - -// Use a q object -q).pykx.repr til 5 -"0 1 2 3 4" -``` - -## `.pykx.toq` - -_Convert an (un)wrapped `PyKX` foreign object into an analogous q type._ - -```q -.pykx.toq[pythonObject] -``` - -**Parameters:** - -name | type | description | ----------------|------------------------|-------------| -`pythonObject` | foreign/composition | A foreign Python object or composition containing a Python foreign to be converted to q - -**Return:** - -type | description -------|------------ -`any` | A q object converted from Python - -```q -// Convert a wrapped PyKX foreign object to q -q)show a:.pykx.eval["1+1"] -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q).pykx.toq a -2 - -// Convert an unwrapped PyKX foreign object to q -q)show b:a`. -foreign -q).pykx.toq b -2 -``` - -## `.pykx.tok` - -_Tag a q object to be indicate conversion to a Pythonic PyKX object when called in Python_ - -```q -.pykx.tok[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be defined as a PyKX object in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a PyKX type object. | - -```q -// Denote that a q object once passed to Python should be managed as a PyKX object -q).pykx.tok til 10 -enlist[`..k;;][0 1 2 3 4 5 6 7 8 9] - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a PyKX object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tok til 10 - -``` - -## `.pykx.topd` - -_Tag a q object to be indicate conversion to a Pandas object when called in Python_ - -```q -.pykx.topd[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be defined as a Pandas object in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a Pandas type object. | - -```q -// Denote that a q object once passed to Python should be managed as a Pandas object -q).pykx.topd til 10 -enlist[`..pandas;;][0 1 2 3 4 5 6 7 8 9] - - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a Pandas Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topd til 10 - -``` - -## `.pykx.topa` - -_Tag a q object to be indicate conversion to a PyArrow object when called in Python_ - -```q -.pykx.topa[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be defined as a PyArrrow object in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a PyArrow type object. | - -```q -// Denote that a q object once passed to Python should be managed as a PyArrow object -q).pykx.topa til 10 -enlist[`..pyarrow;;][0 1 2 3 4 5 6 7 8 9] - - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a PyArrow Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topa til 10 - -``` - -## `.pykx.topy` - -_Tag a q object to be indicate conversion to a Python object when called in Python_ - -```q -.pykx.topa[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be defined as a Python object in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a Python type object. | - -```q -// Denote that a q object once passed to Python should be managed as a Python object -q).pykx.topy til 10 -enlist[`..python;;][0 1 2 3 4 5 6 7 8 9] - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a Python Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topy til 10 - -``` - -## `.pykx.tonp` - -_Tag a q object to be indicate conversion to a Numpy object when called in Python_ - -```q -.pykx.tonp[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be defined as a Numpy object in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a Numpy type object. | - -```q -// Denote that a q object once passed to Python should be managed as a Numpy object -q).pykx.tonp til 10 -enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] - -// Update the default conversion type to be non numpy -q).pykx.i.defaultConv:"py" - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a Numpy Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tonp til 10 - -``` - -## `.pykx.toraw` - -_Tag a q object to be indicate a raw conversion when called in Python_ - -```q -.pykx.toraw[qObject] -``` - -**Parameters:** - -name | type | description | -----------|---------|-------------| -`qObject` | `any` | A q object which is to be converted in its raw form in Python. | - -**Return:** - -type | description --------------|------------ -`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a raw object. | - -```q -// Denote that a q object once passed to Python should be managed as a Numpy object -q).pykx.toraw til 10 -enlist[`..raw;;][0 1 2 3 4 5 6 7 8 9] - -// Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - - -// Pass a q object to Python treating the Python object as a raw Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.toraw til 10 - -``` diff --git a/docs/api/q/q.md b/docs/api/q/q.md deleted file mode 100644 index 2a0c335..0000000 --- a/docs/api/q/q.md +++ /dev/null @@ -1,2454 +0,0 @@ -# Q Reference Card - -This page documents the functions found in the q global namespace that are available in PyKX as attributes of `pykx.q`, or as attributes of `pykx.QConnection` instances. Refer to [the q reference card in the q docs](https://code.kx.com/q/ref/#by-category) for more details about using these functions in q. This page documents how one might use them from Python via PyKX. - -All of these functions take and return q objects, which are wrapped in PyKX as `pykx.K` objects. Arguments of other types will have `pykx.K` called on them to convert them into q objects. Refer to [the PyKX wrappers documentation](../wrappers.md) for more information about `pykx.K` objects. - -## By Category - -Category | Elements ---------------------------- | ----------------------------------------------------------------------------------------- -[Environment](#environment) | [`getenv`](#getenv), [`gtime`](#gtime), [`ltime`](#ltime), [`setenv`](#setenv) -[Interpret](#interpret) | [`eval`](#eval), [`parse`](#parse), [`reval`](#reval), [`show`](#show), [`system`](#system), [`value`](#value) -[IO](#io) | [`dsave`](#dsave), [`get`](#get), [`hclose`](#hclose), [`hcount`](#hcount), [`hdel`](#hdel), [`hopen`](#hopen), [`hsym`](#hsym), [`load`](#load), [`read0`](#read0), [`read1`](#read1), [`rload`](#rload), [`rsave`](#rsave), [`save`](#save), [`set`](#set) -[Iterate](#iterate) | [`each`](#each), [`over`](#over), [`peach`](#peach), [`prior`](#prior), [`scan`](#scan) -[Join](#join) | [`aj`](#aj), [`aj0`](#aj0), [`ajf`](#ajf), [`ajf0`](#ajf0), [`asof`](#asof), [`ej`](#ej), [`ij`](#ij), [`ijf`](#ijf), [`lj`](#lj), [`ljf`](#ljf), [`pj`](#pj), [`uj`](#uj), [`ujf`](#ujf), [`wj`](#wj), [`wj1`](#wj1) -[List](#list) | [`count`](#count), [`cross`](#cross), [`cut`](#cut), [`enlist`](#enlist), [`fills`](#fills), [`first`](#first), [`flip`](#flip), [`group`](#group), [`inter`](#inter), [`last`](#last), [`mcount`](#mcount), [`next`](#next), [`prev`](#prev), [`raze`](#raze), [`reverse`](#reverse), [`rotate`](#rotate), [`sublist`](#sublist), [`sv`](#sv), [`til`](#til), [`union`](#union), [`vs`](#vs), [`where`](#where), [`xprev`](#xprev) -[Logic](#logic) | [`all`](#all), [`any`](#any) -[Math](#math) | [`abs`](#abs), [`acos`](#acos), [`asin`](#asin), [`atan`](#atan), [`avg`](#avg), [`avgs`](#avgs), [`ceiling`](#ceiling), [`cor`](#cor), [`cos`](#cos), [`cov`](#cov), [`deltas`](#deltas), [`dev`](#dev), [`div`](#div), [`ema`](#ema), [`exp`](#exp), [`floor`](#floor), [`inv`](#inv), [`log`](#log), [`lsq`](#lsq), [`mavg`](#mavg), [`max`](#max), [`maxs`](#maxs), [`mdev`](#mdev), [`med`](#med), [`min`](#min), [`mins`](#mins), [`mmax`](#mmax), [`mmin`](#mmin), [`mmu`](#mmu), [`mod`](#mod), [`msum`](#msum), [`neg`](#neg), [`prd`](#prd), [`prds`](#prds), [`rand`](#rand), [`ratios`](#ratios), [`reciprocal`](#reciprocal), [`scov`](#scov), [`sdev`](#sdev), [`signum`](#signum), [`sin`](#sin), [`sqrt`](#sqrt), [`sum`](#sum), [`sums`](#sums), [`svar`](#svar), [`tan`](#tan), [`var`](#var), [`wavg`](#wavg), [`within`](#within), [`wsum`](#wsum), [`xexp`](#xexp), [`xlog`](#xlog) -[Meta](#meta) | [`attr`](#attr), [`null`](#null), [`tables`](#tables), [`type`](#type), [`view`](#view), [`views`](#views) -[Query](#queries) | [`fby`](#fby) -[Sort](#sort) | [`asc`](#asc), [`bin`](#bin), [`binr`](#binr), [`desc`](#desc), [`differ`](#differ), [`distinct`](#distinct), [`iasc`](#iasc), [`idesc`](#idesc), [`rank`](#rank), [`xbar`](#xbar), [`xrank`](#xrank) -[Table](#table) | [`cols`](#cols), [`csv`](#csv), [`fkeys`](#fkeys), [`insert`](#insert), [`key`](#key), [`keys`](#keys), [`meta`](#meta), [`ungroup`](#ungroup), [`upsert`](#upsert), [`xasc`](#xasc), [`xcol`](#xcol), [`xcols`](#xcols), [`xdesc`](#xdesc), [`xgroup`](#xgroup), [`xkey`](#xkey) -[Text](#text) | [`like`](#like), [`lower`](#lower), [`ltrim`](#ltrim), [`md5`](#md5), [`rtrim`](#rtrim), [`ss`](#ss), [`ssr`](#ssr), [`string`](#string), [`trim`](#trim), [`upper`](#upper) - -Not all functions listed on [the q reference card](https://code.kx.com/q/ref/#by-category) are available as attributes of `pykx.q`, or as attributes of `pykx.QConnection` instances. These include elements such as `select`, `exec`, `update`, and `delete` which are not actually q functions, but rather part of the q language itself (i.e. handled by the parser), and functions whose names would result in syntax errors in Python, such as `not` and `or`. - -Because arbitrary q code can be executed using PyKX (except in unlicensed mode, in which none of these functions are available), these limitations can be circumvented as necessary by running q code instead of using [the context interface](../ctx.md). For example, `pykx.q('not')` can be used instead of `pykx.q.not`, and `pykx.q('select from t')` can be used instead of `pykx.q.select(...)`. Consider using [the qSQL query documentation](../query.md) as an alternative to writing qSQL queries as q code. - -## Environment - -### [getenv](https://code.kx.com/q/ref/getenv/) - -Get the value of an environment variable. - -```python ->>> pykx.q.getenv('EDITOR') -pykx.CharVector(q('"nvim"')) -``` - -### [gtime](https://code.kx.com/q/ref/gtime/) - -UTC equivalent of local timestamp. - -```python ->>> import datetime ->>> pykx.q.gtime(datetime.datetime.fromisoformat('2022-05-22T12:23:45.123')) -pykx.TimestampAtom(q('2022.05.22D16:23:45.123000000')) -``` - -### [ltime](https://code.kx.com/q/ref/gtime/#ltime) - -Local equivalent of UTC timestamp. - -```python ->>> import datetime ->>> pykx.q.ltime(datetime.datetime.fromisoformat('2022-05-22T12:23:45.123')) -pykx.TimestampAtom(q('2022.05.22D08:23:45.123000000')) - -``` - -### [setenv](https://code.kx.com/q/ref/getenv/#setenv) - -Set the value of an environment variable. - -```python ->>> pykx.q.setenv('RTMP', b'/home/user/temp') ->>> pykx.q.getenv('RTMP') -pykx.CharVector(q('"/home/user/temp"')) -``` - -## Interpret - -### [eval](https://code.kx.com/q/ref/eval/) - -Evaluate parse trees. - -```python ->>> pykx.q.eval([pykx.q('+'), 2, 3]) -pykx.LongAtom(q('5')) -``` - -### [parse](https://code.kx.com/q/ref/parse/) - -Parse a char vector into a parse tree, which can be evaluated with [`pykx.q.eval`](#eval). - -```python ->>> pykx.q.parse(b'{x * x}') -pykx.Lambda(q('{x * x}')) ->>> pykx.q.parse(b'2 + 3') -pykx.List(pykx.q(' -+ -2 -3 -')) -``` - -### [reval](https://code.kx.com/q/ref/eval/#reval) - -Restricted evaluation of a parse tree. - -Behaves similar to [`eval`](#eval) except the evaluation is blocked from modifying values or global state. - -```python ->>> pykx.q.reval(pykx.q.parse(b'til 10')) -pykx.LongVector(q('0 1 2 3 4 5 6 7 8 9')) -``` - -### [show](https://code.kx.com/q/ref/show/) - -Print the string representation of the given q object. - -Note: `show` bypasses typical Python output redirection. - The q function `show` prints directly to file descriptor 1, so typical Python output redirection methods, e.g. [`contextlib.redirect_stdout`](https://docs.python.org/3/library/contextlib.html#contextlib.redirect_stdout), will not affect it. - -```python ->>> pykx.q.show(range(5)) -0 -1 -2 -3 -4 -pykx.Identity(q('::')) -``` - -### [system](https://code.kx.com/q/ref/system/) - -Execute a system command. - -Where x is a string representing a [system command](https://code.kx.com/q/basics/syscmds/) and any parameters to it, executes the command and returns any result. - -```python ->>> pykx.q.system(b'pwd') -pykx.List(q('"/home/user"')) -``` - -### [value](https://code.kx.com/q/ref/value/) - -Returns the value of x. - -| Input Type | Output Type | -|------------------|--------------------------------------------| -| dictionary | value of the dictionary | -| symbol atom | value of the variable it names | -| enumeration | corresponding symbol vector | -| string | result of evaluating it in current context | -| list | result of evaluating list as a parse tree | -| projection | list: function followed by argument/s | -| composition | list of composed values | -| derived function | argument of the iterator | -| operator | internal code | -| view | list of metadata | -| lambda | structure | -| file symbol | content of datafile | - -```python ->>> pykx.q.value(pykx.q('`q`w`e!(1 2; 3 4; 5 6)')) -pykx.List(q(' -1 2 -3 4 -5 6 -')) -``` - -## IO - -### [dsave](https://code.kx.com/q/ref/dsave/) - -Write global tables to disk as splayed, enumerated, indexed q tables. - -```python ->>> pykx.q('t: ([] x: 1 2 3; y: 10 20 30)') ->>> pykx.q.dsave(':v', 't') -pykx.SymbolAtom(q('`t')) -``` - -### [get](https://code.kx.com/q/ref/get/) - -Read or memory-map a variable or q data file. - -```python ->>> pykx.q('a: 10') ->>> pykx.q.get('a') -pykx.LongAtom(q('10')) -``` - -### [hclose](https://code.kx.com/q/ref/hopen/#hclose) - -Where x is a connection handle, closes the connection, and destroys the handle. -```python ->>> pykx.q.hclose(pykx.q('3i')) -``` -### [hcount](https://code.kx.com/q/ref/hcount/) - -Size of a file in bytes. -```python ->>> pykx.q.hcount('example.txt') -pykx.LongAtom(q('11')) -``` -### [hdel](https://code.kx.com/q/ref/hdel/) - -Where `x` is a [file symbol atom](#hsym), deletes the file or folder (if empty), and returns `x`. - -```python ->>> pykx.q.hdel('example.txt') -``` - -### [hopen](https://code.kx.com/q/ref/hopen/) - -Open a connection to a file or process. - -```python ->>> pykx.q.hopen('example.txt') -pykx.IntAtom(q('3i')) -``` - -### [hsym](https://code.kx.com/q/ref/hsym/) - -Convert symbols to handle symbols, which can be used for I/O as file descriptors or handles. - -```python ->>> pykx.q.hsym('10.43.23.197') -pykx.SymbolAtom(q('`:10.43.23.197')) -``` - -### [load](https://code.kx.com/q/ref/load/) - -Load binary data from a file. - -```python ->>> pykx.q['t'] = pykx.Table([[1, 10], [2, 20], [3, 30]], columns=['x', 'y']) ->>> pykx.q('t') -pykx.Table(pykx.q(' -x y ----- -1 10 -2 20 -3 30 -')) ->>> pykx.q.save('t') # Save t to disk -pykx.SymbolAtom(pykx.q('`:t')) ->>> pykx.q('delete t from `.') # Delete t from memory -pykx.SymbolAtom(pykx.q('`.')) ->>> pykx.q('t') # t is not longer defined -Traceback (most recent call last): -pykx.exceptions.QError: t ->>> pykx.q.load('t') # Load t from disk -pykx.SymbolAtom(pykx.q('`t')) ->>> pykx.q('t') -pykx.Table(pykx.q(' -x y ----- -1 10 -2 20 -3 30 -')) -``` - -### [read0](https://code.kx.com/q/ref/read0/) - -Read text from a file or process handle. - -```python ->>> pykx.q.read0('example.txt') -pykx.List(q(' -"Hello" -"World" -')) -``` - -### [read1](https://code.kx.com/q/ref/read1/) - -Read bytes from a file or named pipe. - -```python ->>> pykx.q.read1('example.txt') -pykx.ByteVector(q('0x48656c6c6f0a576f726c64')) -``` - -### [rload](https://code.kx.com/q/ref/load/#rload) - -Load a splayed table from a directory. - -```python ->>> pykx.q.rload('t') ->>> pykx.q('t') -pykx.Table(q(' -x y ----- -1 10 -2 20 -3 30 -')) -``` - -### [rsave](https://code.kx.com/q/ref/save/#rsave) - -Write a table splayed to a directory. - -```python ->>> pykx.q['t'] = pykx.Table([[1, 10], [2, 20], [3, 30]]) ->>> pykx.q.rsave('t') -pykx.SymbolAtom(q('`:t/')) -``` - -### [save](https://code.kx.com/q/ref/save/) - -Write global data to file or splayed to a directory. - -```python ->>> pykx.q['t'] = pykx.Table([[1, 10], [2, 20], [3, 30]]) ->>> pykx.q.save('t') -pykx.SymbolAtom(q('`:t')) -``` - -### [set](https://code.kx.com/q/ref/get/#set) - -Assign a value to a global variable. - -Persist an object as a file or directory. - -| Types | Result | -|------------------------------|--------------------------------------| -| pykx.q.set(nam, y) | set global `nam` to `y` | -| pykx.q.set(fil, y) | write `y` to a file | -| pykx.q.set(dir, y) | splay `y` to a directory | -| pykx.q.set([fil, lbs, alg, lvl], y) | write `y` to a file, compressed | -| pykx.q.set([dir, lbs, alg, lvl], y) | splay `y` to a directory, compressed | -| pykx.q.set([dir, dic], y) | splay `y` to a directory, compressed | - -Where - -| Abbreviation | K type | Explanation | -|--------------|--------------|-----------------------------| -| alg | integer atom | compression algorithm | -| dic | dictionary | compression specifications | -| dir | filesymbol | directory in the filesystem | -| fil | filesymbol | file in the filesystem | -| lbs | integer atom | logical block size | -| lvl | integer atom | compression level | -| nam | symbol atom | valid q name | -| t | table | | -| y | (any) | any q object | - -[Compression parameters alg, lbs, and lvl](https://code.kx.com/q/kb/file-compression/#parameters) - -[Compression specification dictionary](https://code.kx.com/q/ref/get/#compression) - -```python ->>> pykx.q.set('a', 42) -pykx.SymbolAtom(q('`a')) ->>> pykx.q('a') -pykx.LongAtom(q('42')) -``` - -## Iterate - -### [each](https://code.kx.com/q/ref/each/) - -Iterate over list and apply a function to each element. - -```python ->>> pykx.q.each(pykx.q.count, [b'Tis', b'but', b'a', b'scratch']) -pykx.LongVector(q('3 3 1 7')) ->>> pykx.q.each(pykx.q.sums, [[2, 3, 4], [[5, 6], [7, 8]], [9, 10, 11, 12]]) -pykx.List(q(' -2 5 9 -((5;6);12 14) -9 19 30 42 -')) -``` - -### [over](https://code.kx.com/q/ref/over/) - -The keywords over and [`scan`](#scan) are covers for the accumulating iterators, Over and Scan. It is good style to use over and scan with unary and binary values. - -Just as with Over and Scan, over and scan share the same syntax and perform the same computation; but while scan returns the result of each evaluation, over returns only the last. - -```python ->>> pykx.q.over(pykx.q('*'), [1, 2, 3, 4, 5]) -pykx.LongAtom(q('120')) -``` - -### [peach](https://code.kx.com/q/ref/each/) - -[`each`](#each) and peach perform the same computation and return the same result, but peach will parallelize the work across available threads. - -```python ->>> pykx.q.peach(pykx.q.count, [b'Tis', b'but', b'a', b'scratch']) -pykx.LongVector(q('3 3 1 7')) ->>> pykx.q.peach(pykx.q.sums, [[2, 3, 4], [[5, 6], [7, 8]], [9, 10, 11, 12]]) -pykx.List(q(' -2 5 9 -((5;6);12 14) -9 19 30 42 -')) -``` - -### [prior](https://code.kx.com/q/ref/prior/) - -Applies a function to each item of `x` and the item preceding it, and returns a result of the same length. - -```python ->>> pykx.q.prior(pykx.q('+'), [1, 2, 3, 4, 5]) -pykx.LongVector(pykx.q('1 3 5 7 9')) ->>> pykx.q.prior(lambda x, y: x + y, pykx.LongVector([1, 2, 3, 4, 5])) -pykx.LongVector(pykx.q('0N 3 5 7 9')) -``` - -### [scan](https://code.kx.com/q/ref/over/) - -The keywords [over](#pykx.q.over) and scan are covers for the accumulating iterators, Over and Scan. It is good style to use over and scan with unary and binary values. - -Just as with Over and Scan, over and scan share the same syntax and perform the same computation; but while scan returns the result of each evaluation, over returns only the last. - -```python ->>> pykx.q.scan(pykx.q('+'), [1, 2, 3, 4, 5]) -pykx.LongVector(q('1 3 6 10 15')) -``` - -## Join - -### [aj](https://code.kx.com/q/ref/aj/) - -Performs an as-of join across temporal columns in tables. Returns a table with records from the left-join of the first table and the second table. For each record in the first table, it is matched with the second table over the columns specified in the first input parameter and if there is a match the most recent match will be joined to the record. - -The resulting time column is the value of the boundry used in the first table. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df1 = pd.DataFrame({ -... 'time': np.array([36061, 36063, 36064], dtype='timedelta64[s]'), -... 'sym': ['msft', 'ibm', 'ge'], 'qty': [100, 200, 150] -... }) ->>> df2 = pd.DataFrame({ -... 'time': np.array([36060, 36060, 36060, 36062], dtype='timedelta64[s]'), -... 'sym': ['ibm', 'msft', 'msft', 'ibm'], 'qty': [100, 99, 101, 98] -... }) ->>> pykx.q.aj(pykx.SymbolVector(['sym', 'time']), df1, df2) -pykx.Table(q(' -time sym qty ------------------------------ -0D10:01:01.000000000 msft 101 -0D10:01:03.000000000 ibm 98 -0D10:01:04.000000000 ge 150 -')) -``` - -### [aj0](https://code.kx.com/q/ref/aj/) - -Performs an as-of join across temporal columns in tables. Returns a table with records from the left-join of the first table and the second table. For each record in the first table, it is matched with the second table over the columns specified in the first input parameter and if there is a match the most recent match will be joined to the record. - -The resulting time column is the actual time of the last value in the second table. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df1 = pd.DataFrame({ -... 'time': np.array([36061, 36063, 36064], dtype='timedelta64[s]'), -... 'sym': ['msft', 'ibm', 'ge'], 'qty': [100, 200, 150] -... }) ->>> df2 = pd.DataFrame({ -... 'time': np.array([36060, 36060, 36060, 36062], dtype='timedelta64[s]'), -... 'sym': ['ibm', 'msft', 'msft', 'ibm'], 'qty': [100, 99, 101, 98] -... }) ->>> pykx.q.aj0(pykx.SymbolVector(['sym', 'time']), df1, df2) -pykx.Table(q(' -time sym qty ------------------------------ -0D10:01:00.000000000 msft 101 -0D10:01:02.000000000 ibm 98 -0D10:01:04.000000000 ge 150 -')) -``` - -### [ajf](https://code.kx.com/q/ref/aj/) - -Performs an as-of join across temporal columns in tables with null values being filled. Returns a table with records from the left-join of the first table and the second table. For each record in the first table, it is matched with the second table over the columns specified in the first input parameter and if there is a match the most recent match will be joined to the record. - -The resulting time column is the value of the boundary used in the first table. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df1 = pd.DataFrame({ -... 'time': np.array([1, 1], dtype='timedelta64[s]'), -... 'sym': ['a', 'b'], -... 'p': pykx.LongVector([0, 1]), -... 'n': ['r', 's'] -... }) ->>> df2 = pd.DataFrame({ -... 'time': np.array([1, 1], dtype='timedelta64[s]'), -... 'sym':['a', 'b'], -... 'p': pykx.q('1 0N') -... }) ->>> pykx.q.ajf(pykx.SymbolVector(['sym', 'time']), df1, df2) -pykx.Table(q(' -time sym p n ----------------------------- -0D00:00:01.000000000 a 1 r -0D00:00:01.000000000 b 1 s -')) -``` - -### [ajf0](https://code.kx.com/q/ref/aj/) - -Performs an as-of join across temporal columns in tables with null values being filled. Returns a table with records from the left-join of the first table and the second table. For each record in the first table, it is matched with the second table over the columns specified in the first input parameter and if there is a match the most recent match will be joined to the record. - -The resulting time column is the actual time of the last value in the second table. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df1 = pd.DataFrame({ -... 'time': np.array([1, 1], dtype='timedelta64[s]'), -... 'sym':['a', 'b'], -... 'p': pykx.LongVector([0, 1]), -... 'n': ['r', 's'] -... }) ->>> df2 = pd.DataFrame({ -... 'time': np.array([1, 1], dtype='timedelta64[s]'), -... 'sym': ['a', 'b'], -... 'p': pykx.q('1 0N') -... }) ->>> pykx.q.ajf0(pykx.SymbolVector(['sym', 'time']), df1, df2) -pykx.Table(q(' -time sym p n ----------------------------- -0D00:00:01.000000000 a 1 r -0D00:00:01.000000000 b 1 s -')) -``` - -### [asof](https://code.kx.com/q/ref/asof/) - -Performs an as-of join across temporal columns in tables. The last column the second table must be temporal and correspond to a column in the first table argument. The return is the data from the first table is the last time that is less than or equal to the time in the second table per key. The time column will be removed from the output. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df1 = pd.DataFrame({ -... 'time': np.array([1, 2, 3, 4], dtype='timedelta64[s]'), -... 'sym': ['a', 'a', 'b', 'b'], 'p': pykx.LongVector([2, 4, 6, 8])}) ->>> df2 = pd.DataFrame({'sym':['b'], 'time': np.array([3], dtype='timedelta64[s]')}) ->>> pykx.q.asof(df1, df2) -pykx.Table(q(' -p -- -6 -')) -``` - -### [ej](https://code.kx.com/q/ref/ej/) - -Equi join. The result has one combined record for each row in the second table that matches the first table on the columns specified in the first function parameter. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['a', 'a', 'b', 'a', 'c', 'b', 'c', 'a'], 'p': pykx.LongVector([2, 4, 6, 8, 1, 3, 5, 7])}) ->>> df2 = pd.DataFrame({'sym':['a', 'b'], 'w': ['alpha', 'beta']}) ->>> pykx.q.ej('sym', df1, df2) -pykx.Table(q(' -sym p w ------------ -a 2 alpha -a 4 alpha -b 6 beta -a 8 alpha -b 3 beta -a 7 alpha -')) -``` - -### [ij](https://code.kx.com/q/ref/ij/) - -Inner join. The result has one combined record for each row in the first table that matches the second table on the columns specified in the first function parameter. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> df2 = pykx.q.xkey('sym', df2) ->>> pykx.Table(df1) -pykx.Table(q(' -sym p ------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -')) ->>> df2 -pykx.KeyedTable(q(' -sym | ex MC -----| -------- -IBM | N 1000 -MSFT| CME 250 -')) ->>> pykx.q.ij(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 N 1000 -IBM 2 N 1000 -MSFT 5 CME 250 -')) -``` - -### [ijf](https://code.kx.com/q/ref/ij/) - -Inner join nulls filled. The result has one combined record for each row in the first table that matches the second table on the columns specified in the first function parameter. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> b = pykx.q.xkey('sym', df2) ->>> pykx.Table(df1) -pykx.Table(q(' -sym p ------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -')) ->>> df2 -pykx.KeyedTable(q(' -sym | ex MC -----| -------- -IBM | N 1000 -MSFT| CME 250 -')) ->>> pykx.q.ijf(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 N 1000 -IBM 2 N 1000 -MSFT 5 CME 250 -')) -``` - -### [lj](https://code.kx.com/q/ref/lj/) - -Left join. For each record in the first table, the result has one record with the columns of second table joined to columns of the first using the primary keys of the second table, if no value is present in the second table the record will contain null values in the place of the columns of the second table. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> b = pykx.q.xkey('sym', df2) ->>> pykx.Table(df2) -pykx.Table(q(' -sym p ------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -')) ->>> df1 -pykx.KeyedTable(q(' -sym | ex MC -----| -------- -IBM | N 1000 -MSFT| CME 250 -')) ->>> pykx.q.lj(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 N 1000 -FDP 8 -FDP 6 -FDP 5 -IBM 2 N 1000 -MSFT 5 CME 250 -')) -``` - -### [ljf](https://code.kx.com/q/ref/lj/) - -Left join nulls filled. For each record in the first table, the result has one record with the columns of second table joined to columns of the first using the primary keys of the second table, if no value is present in the second table the record will contain null values in the place of the columns of the second table. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> b = pykx.q.xkey('sym', df2) ->>> pykx.Table(df1) -pykx.Table(q(' -sym p ------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -')) ->>> df1 -pykx.KeyedTable(q(' -sym | ex MC -----| -------- -IBM | N 1000 -MSFT| CME 250 -')) ->>> pykx.q.ljf(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 N 1000 -FDP 8 -FDP 6 -FDP 5 -IBM 2 N 1000 -MSFT 5 CME 250 -')) -``` - -### [pj](https://code.kx.com/q/ref/pj/) - -Plus join. For each record in the first table, the result has one record with the columns of second table joined to columns of the first using the primary keys of the second table, if a value is present it is added to the columns of the first table, if no value is present the columns are left unchanged and new columns are set to 0. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'a': pykx.LongVector([1, 2, 3]), 'b':['x', 'y', 'z'], 'c': pykx.LongVector([10, 20, 30])}) ->>> pykx.Table(df1) -pykx.Table(q(' -a b c ------- -1 x 10 -2 y 20 -3 z 30 -')) ->>> df2 = pd.DataFrame({ -... 'a': pykx.LongVector([1, 3]), -... 'b':['x', 'z'], -... 'c': pykx.LongVector([1, 2]), -... 'd': pykx.LongVector([10, 20]) -... }) ->>> df2 = pykx.q.xkey(pykx.SymbolVector(['a', 'b']), df2) -pykx.KeyedTable(q(' -a b| c d ----| ---- -1 x| 1 10 -3 z| 2 20 -')) ->>> pykx.q.pj(df1, df2) -pykx.Table(q(' -a b c d ---------- -1 x 11 10 -2 y 20 0 -3 z 32 20 -')) -``` - -### [uj](https://code.kx.com/q/ref/uj/) - -Union join. Where the first table and the second table are both keyed or both unkeyed tables, returns the union of the columns, filled with nulls where necessary. If the tables have matching key columns then the records in the second table will be used to update the first table, if the tables are not keyed then the records from the second table will be joined onto the end of the first table. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> df1 - sym p -0 IBM 7 -1 FDP 8 -2 FDP 6 -3 FDP 5 -4 IBM 2 -5 MSFT 5 ->>> df2 - sym ex MC -0 IBM N 1000 -1 MSFT CME 250 ->>> pykx.q.uj(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -IBM N 1000 -MSFT CME 250 -')) -``` - -### [ujf](https://code.kx.com/q/ref/uj/) - -Union join nulls filled. Where the first table and the second table are both keyed or both unkeyed tables, returns the union of the columns, filled with nulls where necessary. If the tables have matching key columns then the records in the second table will be used to update the first table, if the tables are not keyed then the records from the second table will be joined onto the end of the first table. - -```python ->>> import pandas as pd ->>> df1 = pd.DataFrame({'sym':['IBM', 'FDP', 'FDP', 'FDP', 'IBM', 'MSFT'], 'p': pykx.LongVector([7, 8, 6, 5, 2, 5])}) ->>> df2 = pd.DataFrame({'sym':['IBM', 'MSFT'], 'ex': ['N', 'CME'], 'MC': pykx.LongVector([1000, 250])}) ->>> df1 - sym p -0 IBM 7 -1 FDP 8 -2 FDP 6 -3 FDP 5 -4 IBM 2 -5 MSFT 5 ->>> df2 - sym ex MC -0 IBM N 1000 -1 MSFT CME 250 ->>> pykx.q.ujf(df1, df2) -pykx.Table(q(' -sym p ex MC ---------------- -IBM 7 -FDP 8 -FDP 6 -FDP 5 -IBM 2 -MSFT 5 -IBM N 1000 -MSFT CME 250 -')) -``` - -### [wj](https://code.kx.com/q/ref/wj/) - -Window join. Returns for each record in the table, a record with additional columns `c0` and `c1`, which contain the results of the aggregation functions applied to values over the matching intervals defined in the first parameter of the function. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> pykx.q('t: ([]sym:3#`ibm;time:10:01:01 10:01:04 10:01:08;price:100 101 105)') -pykx.Table(pykx.q(' -sym time price ------------------- -ibm 10:01:01 100 -ibm 10:01:04 101 -ibm 10:01:08 105 -')) ->>> df_t = pd.DataFrame({ - 'sym': ['ibm', 'ibm', 'ibm'], - 'time': np.array([36061, 36064, 36068], dtype='timedelta64[s]'), - 'price': pykx.LongVector([100, 101, 105]) - }) - sym time price -0 ibm 0 days 10:01:01 100 -1 ibm 0 days 10:01:04 101 -2 ibm 0 days 10:01:08 105 ->>> pykx.q('q:([]sym:`ibm; time:10:01:01+til 9; ask: (101 103 103 104 104 107 108 107 108); bid: (98 99 102 103 103 104 106 106 107))') -pykx.Table(pykx.q(' -sym time ask bid --------------------- -ibm 10:01:01 101 98 -ibm 10:01:02 103 99 -ibm 10:01:03 103 102 -ibm 10:01:04 104 103 -ibm 10:01:05 104 103 -ibm 10:01:06 107 104 -ibm 10:01:07 108 106 -ibm 10:01:08 107 106 -ibm 10:01:09 108 107 -')) ->>> f = pykx.SymbolVector(['sym', 'time']) ->>> w = pykx.q('-2 1+\:t.time') ->>> pykx.q.wj(w, f, df_t, pykx.q('(q;(max;`ask);(min;`bid))')) -pykx.Table(pykx.q(' -sym time price ask bid --------------------------- -ibm 10:01:01 100 103 98 -ibm 10:01:04 101 104 99 -ibm 10:01:08 105 108 104 -')) -``` - -### [wj1](https://code.kx.com/q/ref/wj/) - -Window join. Returns for each record in the table, a record with additional columns `c0` and `c1`, which contain the results of the aggregation functions applied to values over the matching intervals defined in the first parameter of the function. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> pykx.q('t: ([]sym:3#`ibm;time:10:01:01 10:01:04 10:01:08;price:100 101 105)') -pykx.Table(pykx.q(' -sym time price ------------------- -ibm 10:01:01 100 -ibm 10:01:04 101 -ibm 10:01:08 105 -')) ->>> df_t = pd.DataFrame({ -... 'sym': ['ibm', 'ibm', 'ibm'], -... 'time': np.array([36061, 36064, 36068], dtype='timedelta64[s]'), -... 'price': pykx.LongVector([100, 101, 105]) -... }) - sym time price -0 ibm 0 days 10:01:01 100 -1 ibm 0 days 10:01:04 101 -2 ibm 0 days 10:01:08 105 ->>> pykx.q('q:([]sym:`ibm; time:10:01:01+til 9; ask: (101 103 103 104 104 107 108 107 108); bid: (98 99 102 103 103 104 106 106 107))') -pykx.Table(pykx.q(' -sym time ask bid --------------------- -ibm 10:01:01 101 98 -ibm 10:01:02 103 99 -ibm 10:01:03 103 102 -ibm 10:01:04 104 103 -ibm 10:01:05 104 103 -ibm 10:01:06 107 104 -ibm 10:01:07 108 106 -ibm 10:01:08 107 106 -ibm 10:01:09 108 107 -')) ->>> f = pykx.SymbolVector(['sym', 'time']) ->>> w = pykx.q('-2 1+\:t.time') ->>> pykx.q.wj(w, f, df_t, pykx.q('(q;(max;`ask);(min;`bid))')) -pykx.Table(pykx.q(' -sym time price ask bid --------------------------- -ibm 10:01:01 100 103 98 -ibm 10:01:04 101 104 99 -ibm 10:01:08 105 108 104 -')) -``` - -## List - -### [count](https://code.kx.com/q/ref/count/) - -Count the items of a list or dictionary. - -```python ->>> pykx.q.count([1, 2, 3]) -pykx.LongAtom(q('3')) -``` - -### [cross](https://code.kx.com/q/ref/cross/) - -Returns all possible combinations of x and y. - -```python ->>> pykx.q.cross([1, 2, 3], [4, 5, 6]) -pykx.List(q(' -1 4 -1 5 -1 6 -2 4 -2 5 -2 6 -3 4 -3 5 -3 6 -')) -``` - -### [cut](https://code.kx.com/q/ref/cut/) - -Cut a list or table into sub-arrays. - -```python ->>> pykx.q.cut(3, range(10)) -pykx.List(q(' -0 1 2 -3 4 5 -6 7 8 -,9 -')) -``` - -### [enlist](https://code.kx.com/q/ref/enlist/) - -Returns a list with its arguments as items. - -```python ->>> pykx.q.enlist(1, 2, 3, 4) -pykx.LongVector(q('1 2 3 4')) -``` - -### [fills](https://code.kx.com/q/ref/fills/) - -Replace nulls with preceding non-nulls. - -```python ->>> a = pykx.q('0N 1 2 0N 0N 2 3 4 5 0N 4') ->>> pykx.q.fills(a) -pykx.LongVector(q('0N 1 2 2 2 2 3 4 5 5 4')) -``` - -### [first](https://code.kx.com/q/ref/first/) - -First item of a list -```python ->>> pykx.q.first([1, 2, 3, 4, 5]) -pykx.LongAtom(q('1')) -``` - -### [flip](https://code.kx.com/q/ref/flip/) - -Returns x transposed, where x may be a list of lists, a dictionary or a table. - -```python ->>> pykx.q.flip([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) -pykx.List(q(' -1 6 -2 7 -3 8 -4 9 -5 10 -')) -``` - -### [group](https://code.kx.com/q/ref/group/) - -Returns a dictionary in which the keys are the distinct items of x, and the values the indexes where the distinct items occur. - -The order of the keys is the order in which they appear in x. - -```python ->>> pykx.q.group(b'mississippi') -pykx.Dictionary(q(' -m| ,0 -i| 1 4 7 10 -s| 2 3 5 6 -p| 8 9 -')) -``` - -### [inter](https://code.kx.com/q/ref/inter/) - -Intersection of two lists or dictionaries. - -```python ->>> pykx.q.inter([1, 2, 3], [2, 3, 4]) -pykx.LongVector(q('2 3')) -``` - -### [last](https://code.kx.com/q/ref/first/#last) - -Last item of a list - -```python ->>> pykx.q.last([1, 2, 3]) -pykx.LongAtom(q('3')) -``` - -### [mcount](https://code.kx.com/q/ref/count/#mcount) - -Returns the x-item moving counts of the non-null items of y. The first x items of the result are the counts so far, and thereafter the result is the moving count. - -```python ->>> pykx.q.mcount(3, pykx.q('1 2 3 4 5 0N 6 7 8')) -pykx.IntVector(q('1 2 3 3 3 2 2 2 3i')) -``` - -### [next](https://code.kx.com/q/ref/next/) - -Next items in a list. - -```python ->>> pykx.q.next([1, 2, 3, 4]) -pykx.LongVector(q('2 3 4 0N')) -``` - -### [prev](https://code.kx.com/q/ref/next/#prev) - -Immediately preceding items in a list. - -```python ->>> pykx.q.prev([1, 2, 3, 4]) -pykx.LongVector(q('0N 1 2 3')) -``` - -### [raze](https://code.kx.com/q/ref/raze/) - -Return the items of x joined, collapsing one level of nesting. - -```python ->>> pykx.q.raze([[1, 2], [3, 4]]) -pykx.LongVector(q('1 2 3 4')) -``` - -### [reverse](https://code.kx.com/q/ref/reverse/) - -Reverse the order of items of a list or dictionary. - -```python ->>> pykx.q.reverse([1, 2, 3, 4, 5]) -pykx.List(q(' -5 -4 -3 -2 -1 -')) -``` - -### [rotate](https://code.kx.com/q/ref/rotate/) - -Shift the items of a list to the left or right. - -```python ->>> pykx.q.rotate(2, [1, 2, 3, 4, 5]) -pykx.LongVector(q('3 4 5 1 2')) -``` - -### [sublist](https://code.kx.com/q/ref/sublist/) - -Select a sublist of a list. - -```python ->>> pykx.q.sublist(2, [1, 2, 3, 4, 5]) -pykx.LongVector(q('1 2')) -``` - -### [sv](https://code.kx.com/q/ref/sv/) - -"Scalar from vector" - -- join strings, symbols, or filepath elements -- decode a vector to an atom - -```python ->>> pykx.q.sv(10, [1, 2, 3, 4]) -pykx.LongAtom(q('1234')) -``` - -### [til](https://code.kx.com/q/ref/til/) - -First x natural numbers. - -```python ->>> pykx.q.til(10) -pykx.LongVector(q('0 1 2 3 4 5 6 7 8 9')) -``` - -### [union](https://code.kx.com/q/ref/union/) - -Union of two lists. - -```python ->>> pykx.q.union([1, 2, 3, 3, 5], [2, 4, 6, 8]) -pykx.LongVector(q('1 2 3 5 4 6 8')) -``` - -### [vs](https://code.kx.com/q/ref/vs/) - -"Vector from scalar" - -- partition a symbol, string, or bytestream -- encode a vector from an atom, or a matrix from a vector - -```python ->>> pykx.q.vs(b',', b'one,two,three') -pykx.List(q(' -"one" -"two" -"three" -')) -``` - -### [where](https://code.kx.com/q/ref/where/) - -Copies of indexes of a list or keys of a dictionary. - -```python ->>> pykx.q.where(pykx.BooleanVector([True, False, True, True, False])) -pykx.LongVector(q('0 2 3')) ->>> pykx.q.where(pykx.q('1 0 0 1 0 1 1')) -pykx.LongVector(q('0 3 5 6')) -``` - -### [xprev](https://code.kx.com/q/ref/next/#xprev) - -Nearby items in a list. - -```python ->>> pykx.q.xprev(2, [1, 2, 3, 4, 5, 6]) -pykx.LongVector(q('0N 0N 1 2 3 4')) -``` - -There is no `xnext` function, but `xprev` with a negative number as its first argument can achieve this. - -```python ->>> pykx.q.xprev(-2, [1, 2, 3, 4, 5, 6]) -pykx.LongVector(q('3 4 5 6 0N 0N')) -``` - -## Logic - -### [all](https://code.kx.com/q/ref/all-any/#all/) - -Everything is true. - -```python ->>> pykx.q.all([True, True, True, True]) -pykx.BooleanAtom(q('1b')) ->>> pykx.q.all([True, True, False, True]) -pykx.BooleanAtom(q('0b')) -``` - -### [any](https://code.kx.com/q/ref/all-any/#any) - -Something is true. - -```python ->>> pykx.q.any([False, False, True, False]) -pykx.BooleanAtom(q('1b')) ->>> pykx.q.any([False, False]) -pykx.BooleanAtom(q('0b')) -``` - -## Math - -### [abs](https://code.kx.com/q/ref/abs/) - -Where x is a numeric or temporal, returns the absolute value of x. Null is returned if x is null. - -```python ->>> pykx.q.abs(-5) -pykx.LongAtom(q('5')) -``` - -### [acos](https://code.kx.com/q/ref/cos/) - -The arccosine of x; that is, the value whose cosine is x. The result is in radians and lies between 0 and Ï€. - -```python ->>> pykx.q.acos(0.5) -pykx.FloatAtom(q('1.047198')) -``` - -### [asin](https://code.kx.com/q/ref/sin/) - -The arcsine of x; that is, the value whose sine is x. The result is in radians and lies between -Ï€ / 2 and Ï€ / 2. (The range is approximate due to rounding errors). Null is returned if the argument is not between -1 and 1. - -```python ->>> pykx.q.asin(0.5) -pykx.FloatAtom(q('0.5235988')) -``` - -### [atan](https://code.kx.com/q/ref/tan/) - -The arctangent of x; that is, the value whose tangent is x. The result is in radians and lies between -Ï€ / 2 and Ï€ / 2. -```python ->>> pykx.q.atan(0.5) -pykx.FloatAtom(q('0.4636476')) -``` - -### [avg](https://code.kx.com/q/ref/avg/#avg) - -Arithmetic mean. - -```python ->>> pykx.q.avg([1, 2, 3, 4, 7]) -pykx.FloatAtom(q('3.4')) -``` - -### [avgs](https://code.kx.com/q/ref/avg/#avgs) - -Running mean. - -```python ->>> pykx.q.avgs([1, 2, 3, 4, 7]) -pykx.FloatVector(q('1 1.5 2 2.5 3.4')) -``` - -### [ceiling](https://code.kx.com/q/ref/ceiling/) - -Round up. - -```python ->>> pykx.q.ceiling([-2.7, -1.1, 0, 1.1, 2.7]) -pykx.LongVector(q('-2 -1 0 2 3')) -``` - -### [cor](https://code.kx.com/q/ref/cor/) - -Correlation. - -```python ->>> pykx.q.cor(pykx.LongVector([29, 10, 54]), pykx.LongVector([1, 3, 9])) -pykx.FloatAtom(q('0.7727746')) -``` - -### [cos](https://code.kx.com/q/ref/cos/) - -The cosine of x, taken to be in radians. The result is between -1 and 1, or null if the argument is null or infinity. - -```python ->>> pykx.q.cos(0.2) -pykx.FloatAtom(q('0.9800666')) -``` - -### [cov](https://code.kx.com/q/ref/cov/) - -Where x and y are conforming numeric lists returns their covariance as a floating-point number. Applies to all numeric data types and signals an error with temporal types, char and sym. - -```python ->>> pykx.q.cov(pykx.LongVector([29, 10, 54]), pykx.LongVector([1, 3, 9])) -pykx.FloatAtom(q('47.33333')) -``` - -### [deltas](https://code.kx.com/q/ref/deltas/) - -Where x is a numeric or temporal vector, returns differences between consecutive pairs of its items. - -```python ->>> pykx.q.deltas(pykx.LongVector([1, 4, 9, 16])) -pykx.LongVector(q('1 3 5 7')) -``` - -### [dev](https://code.kx.com/q/ref/dev/) - -Standard deviation. - -```python ->>> pykx.q.dev(pykx.LongVector([10, 343, 232, 55])) -pykx.FloatAtom(q('134.3484')) -``` - -### [div](https://code.kx.com/q/ref/div/) - -Integer division. - -```python ->>> pykx.q.div(7, 3) -pykx.LongAtom(q('2')) -``` - -### [ema](https://code.kx.com/q/ref/ema/) - -The cosine of x, taken to be in radians. The result is between -1 and 1, or null if the argument is null or infinity. - -```python ->>> pykx.q.ema(0.5, [1, 2, 3, 4, 5]) -pykx.FloatVector(q('1 1.5 2.25 3.125 4.0625')) -``` - -### [exp](https://code.kx.com/q/ref/exp/) - -Raise *e* to a power. - -```python ->>> pykx.q.exp(1) -pykx.FloatAtom(q('2.718282')) -``` - -### [floor](https://code.kx.com/q/ref/floor/) - -Round down. - -```python ->>> pykx.q.floor([-2.7, -1.1, 0, 1.1, 2.7]) -pykx.LongVector(q('-3 -2 0 1 2')) -``` - -### [inv](https://code.kx.com/q/ref/inv/) - -Matrix inverse. - -```python ->>> a = pykx.q('3 3# 2 4 8 3 5 6 0 7 1f') -pykx.List(q(' -2 4 8 -3 5 6 -0 7 1 -')) ->>> pykx.q.inv(a) -pykx.List(q(' --0.4512195 0.6341463 -0.195122 --0.03658537 0.02439024 0.1463415 -0.2560976 -0.1707317 -0.02439024 -')) -``` - -### [log](https://code.kx.com/q/ref/log/) - -Natural logarithm. - -```python ->>> pykx.q.log([1, 2, 3]) -pykx.FloatVector(q('0 0.6931472 1.098612')) -``` - -### [lsq](https://code.kx.com/q/ref/lsq/) - -Least squares, matrix divide. - -```python ->>> a = pykx.q('1f+3 4#til 12') -pykx.List(q(' -1 2 3 4 -5 6 7 8 -9 10 11 12 -')) ->>> b = pykx.q('4 4#2 7 -2 5 5 3 6 1 -2 5 2 7 5 0 3 4f') -pykx.List(q(' -2 7 -2 5 -5 3 6 1 --2 5 2 7 -5 0 3 4 -')) ->>> pykx.q.lsq(a, b) -pykx.List(q(' --0.1233333 0.16 0.4766667 0.28 -0.07666667 0.6933333 0.6766667 0.5466667 -0.2766667 1.226667 0.8766667 0.8133333 -')) -``` - -### [mavg](https://code.kx.com/q/ref/avg/#mavg) - -Moving averages. - -```python ->>> pykx.q.mavg(3, [1, 2, 3, 5, 7, 10]) -pykx.FloatVector(q('1 1.5 2 3.333333 5 7.333333')) -``` - -### [max](https://code.kx.com/q/ref/max/) - -Maximum. - -```python ->>> pykx.q.max([0, 7, 2, 4 , 1, 3]) -pykx.LongAtom(q('7')) -``` - -### [maxs](https://code.kx.com/q/ref/max/#maxs) - -Maximums. - -```python ->>> pykx.q.maxs([1, 2, 5, 4, 7, 1, 2]) -pykx.LongVector(q('1 2 5 5 7 7 7')) -``` - -### [mdev](https://code.kx.com/q/ref/dev/#mdev) - -Moving deviations. - -```python ->>> pykx.q.mdev(3, [1, 2, 5, 4, 7, 1, 2]) -pykx.FloatVector(q('0 0.5 1.699673 1.247219 1.247219 2.44949 2.624669')) -``` - -### [med](https://code.kx.com/q/ref/med/) - -Median. - -```python ->>> pykx.q.med([1, 2, 3, 4, 4, 1, 2, 4, 5]) -pykx.FloatAtom(q('3f')) -``` - -### [min](https://code.kx.com/q/ref/min/) - -Minimum. - -```python ->>> pykx.q.min([7, 5, 2, 4, 6, 5, 1, 4]) -pykx.LongAtom(q('1')) -``` - -### [mins](https://code.kx.com/q/ref/min/#mins) - -Minimums. - -```python ->>> pykx.q.mins([7, 5, 2, 4, 6, 5, 1, 4]) -pykx.LongVector(q('7 5 2 2 2 2 1 1')) -``` - -### [mmax](https://code.kx.com/q/ref/max/#mmax) - -Moving maximums. - -```python ->>> pykx.q.mmax(4, [7, 5, 2, 4, 6, 5, 1, 4]) -pykx.LongVector(q('7 7 7 7 6 6 6 6')) -``` - -### [mmin](https://code.kx.com/q/ref/min/#mmin) - -Moving minimums. - -```python ->>> pykx.q.mmin(4, pykx.LongVector([7, 5, 2, 4, 6, 5, 1, 4])) -pykx.LongVector(q('7 5 2 2 2 2 1 1')) -``` - -### [mmu](https://code.kx.com/q/ref/mmu/) - -Matrix multiply, dot product. - -```python ->>> a = pykx.q('2 4#2 4 8 3 5 6 0 7f') ->>> a -pykx.List(q(' -2 4 8 3 -5 6 0 7 -')) ->>> b = pykx.q('4 3#"f"$til 12') ->>> b -pykx.List(q(' -0 1 2 -3 4 5 -6 7 8 -9 10 11 -')) ->>> pykx.q.mmu(a, b) -pykx.List(q(' -87 104 121 -81 99 117 -')) -``` - -### [mod](https://code.kx.com/q/ref/mod/) - -Modulus. - -```python ->>> pykx.q.mod([1, 2, 3, 4, 5, 6, 7], 4) -pykx.LongVector(q('1 2 3 0 1 2 3')) -``` - -### [msum](https://code.kx.com/q/ref/sum/#msum) - -Moving sums. - -```python ->>> pykx.q.msum(3, [1, 2, 3, 4, 5, 6, 7]) -pykx.LongVector(q('1 3 6 9 12 15 18')) -``` - -### [neg](https://code.kx.com/q/ref/neg/) - -Negate. - -```python ->>> pykx.q.neg([2, 0, -1, 3, -5]) -pykx.LongVector(q('-2 0 1 -3 5')) -``` - -### [prd](https://code.kx.com/q/ref/prd/) - -Product. - -```python ->>> pykx.q.prd([1, 2, 3, 4, 5]) -pykx.LongAtom(q('120')) -``` - -### [prds](https://code.kx.com/q/ref/prd/#prds) - -Cumulative products. - -```python ->>> pykx.q.prds([1, 2, 3, 4, 5]) -pykx.LongVector(q('1 2 6 24 120')) -``` - -### [rand](https://code.kx.com/q/ref/rand/) - -Pick randomly. - -```python ->>> pykx.q.rand([1, 2, 3, 4, 5]) -pykx.LongAtom(q('2')) -``` - -### [ratios](https://code.kx.com/q/ref/ratios/) - -Ratios between items. - -```python ->>> pykx.q.ratios([1, 2, 3, 4, 5]) -pykx.FloatVector(q('0n 2 1.5 1.333333 1.25')) -``` - -### [reciprocal](https://code.kx.com/q/ref/reciprocal/) - -Reciprocal of a number. - -```python ->>> pykx.q.reciprocal([1, 0, 3]) -pykx.FloatVector(q('1 0w 0.3333333')) -``` - -### [scov](https://code.kx.com/q/ref/cov/#scov) - -Sample covariance. - -```python ->>> pykx.q.scov(pykx.LongVector([2, 3, 5, 7]), pykx.LongVector([4, 3, 0, 2])) -pykx.FloatAtom(q('-2.416667')) -``` - -### [sdev](https://code.kx.com/q/ref/dev/#sdev) - -Sample standard deviation. - -```python ->>> pykx.q.sdev(pykx.LongVector([10, 343, 232, 55])) -pykx.FloatAtom(q('155.1322')) -``` - -### [signum](https://code.kx.com/q/ref/signum/) - -Where x (or its underlying value for temporals) is - -- null or negative, returns `-1i` -- zero, returns `0i` -- positive, returns `1i` - -```python ->>> pykx.q.signum([-2, 0, 1, 3]) -pykx.IntVector(q('-1 0 1 1i')) -``` - -### [sin](https://code.kx.com/q/ref/sin/) - -Sine. - -```python ->>> pykx.q.sin(0.5) -pykx.FloatAtom(q('0.4794255')) -``` - -### [sqrt](https://code.kx.com/q/ref/sqrt/) - -Square root. - -```python ->>> pykx.q.sqrt([-1, 0, 25, 50]) -pykx.FloatVector(q('0n 0 5 7.071068')) -``` - -### [sum](https://code.kx.com/q/ref/sum/) - -Total. - -```python ->>> pykx.q.sum(pykx.LongVector([2, 3, 5, 7])) -pykx.LongAtom(q('17')) -``` - -### [sums](https://code.kx.com/q/ref/sum/#sums) - -Cumulative total. - -```python ->>> pykx.q.sums(pykx.LongVector([2, 3, 5, 7])) -pykx.LongVector(q('2 5 10 17')) -``` - -### [svar](https://code.kx.com/q/ref/var/#svar) - -Sample variance. - -```python ->>> pykx.q.svar(pykx.LongVector([2, 3, 5, 7])) -pykx.FloatAtom(q('4.916667')) -``` - -### [tan](https://code.kx.com/q/ref/tan/) - -Tangent. - -```python ->>> pykx.q.tan(0.5) -pykx.FloatAtom(q('0.5463025')) -``` - -### [var](https://code.kx.com/q/ref/var/) - -Variance. - -```python ->>> pykx.q.var(pykx.LongVector([2, 3, 5, 7])) -pykx.FloatAtom(q('3.6875')) -``` - -### [wavg](https://code.kx.com/q/ref/avg/#wavg) - -Weighted average. - -```python ->>> pykx.q.wavg([2, 3, 4], [1, 2 ,4]) -pykx.FloatAtom(q('2.666667')) -``` - -### [within](https://code.kx.com/q/ref/within/) - -Check bounds. - -```python ->>> pykx.q.within([1, 3, 10, 6, 4], [2, 6]) -pykx.BooleanVector(q('01011b')) -``` - -### [wsum](https://code.kx.com/q/ref/sum/#wsum) - -Weighted sum. - -```python ->>> pykx.q.wsum([2, 3, 4], [1, 2, 4]) # equivalent to 2 * 1 + 3 * 2 + 4 * 4 -pykx.LongAtom(q('24')) -``` - -### [xexp](https://code.kx.com/q/ref/exp/#xepx) - -Raise x to a power. - -```python ->>> pykx.q.xexp(2, 8) -pykx.FloatAtom(q('256f')) -``` - -### [xlog](https://code.kx.com/q/ref/log/#xlog) - -Logarithm base x. - -```python ->>> pykx.q.xlog(2, 8) -pykx.FloatAtom(q('3f')) -``` - -## Meta - -### [attr](https://code.kx.com/q/ref/attr/) - -Attributes of an object, returns a Symbol Atom or Vector. - -The possible attributes are: - -| code | attribute | -|------|-----------------------| -| s | sorted | -| u | unique (hash table) | -| p | partitioned (grouped) | -| g | true index (dynamic attribute): enables constant time update and access for real-time tables | - -```python ->>> pykx.q.attr([1,2,3]) -pykx.SymbolAtom(q('`')) ->>> pykx.q.attr(pykx.q('asc 1 2 3')) -pykx.SymbolAtom(q('`s')) -``` - -### [null](https://code.kx.com/q/ref/null/) - -Is null. - -```python ->>> pykx.q.null(1) -pykx.BooleanAtom(q('0b')) ->>> pykx.q.null(float('NaN')) -pykx.BooleanAtom(q('1b')) ->>> pykx.q.null(None) -pykx.BooleanAtom(q('1b')) -``` - -### [tables](https://code.kx.com/q/ref/tables/) - -List of tables in a namespace. - -```python ->>> pykx.q('exampleTable: ([] a: til 10; b: 10?10)') -pykx.Identity(pykx.q('::')) ->>> pykx.q('exampleTable: ([] a: til 10; b: 10?10)') -pykx.Table(q(' -a b ---- -0 8 -1 1 -2 9 -3 5 -4 4 -5 6 -6 6 -7 1 -8 8 -9 5 -')) ->>> pykx.q.tables('.') -pykx.SymbolVector(q(',`exampleTable')) -``` - -### [type](https://code.kx.com/q/ref/type/) - -Underlying [k type](https://code.kx.com/q/ref/#datatypes) of an [object](../wrappers.md). - -```python ->>> pykx.q.type(1) -pykx.ShortAtom(q('-7h')) ->>> pykx.q.type([1, 2, 3]) -pykx.ShortAtom(q('0h')) ->>> pykx.q.type(pykx.LongVector([1, 2, 3])) -pykx.ShortAtom(q('7h')) -``` - -### [view](https://code.kx.com/q/ref/view/) - -Expression defining a view. - -```python ->>> pykx.q('v::2+a*3') ->>> pykx.q('a:5') ->>> pykx.q('v') -pykx.LongAtom(q('17')) ->>> pykx.q.view('v') -pykx.CharVector(q('"2+a*3"')) -``` - -### [views](https://code.kx.com/q/ref/view/#views) - -List views defined in the default namespace. - -```python ->>> pykx.q('v::2+a*3') ->>> pykx.q('a:5') ->>> pykx.q('v') -pykx.LongAtom(q('17')) ->>> pykx.q.views() -pykx.SymbolVector(q(',`v')) -``` - -## Queries - -### [fby](https://code.kx.com/q/ref/fby/) - -Apply an aggregate to groups. - -```python ->>> d = pykx.q('data: 10?10') -pykx.LongVector(pykx.q('4 9 2 7 0 1 9 2 1 8')) ->>> group = pykx.SymbolVector(['a', 'b', 'a', 'b', 'c', 'd', 'c', 'd', 'd', 'c']) -pykx.SymbolVector(pykx.q('`a`b`a`b`c`d`c`d`d`c')) ->>> >>> pykx.q.fby(pykx.q('(sum; data)'), group) -pykx.LongVector(pykx.q('6 16 6 16 17 4 17 4 4 17')) -``` - -## Sort - -### [asc](https://code.kx.com/q/ref/asc/) - -Ascending sort. - -```python ->>> pykx.q.asc([4, 2, 5, 1, 0]) -pykx.LongVector(q('`s#0 1 2 4 5')) -``` - -### [bin](https://code.kx.com/q/ref/bin/) - -Binary search. - -```python ->>> pykx.q.bin([0, 2, 4, 6, 8, 10], 5) -pykx.LongAtom(q('2')) ->>> pykx.q.bin([0, 2, 4, 6, 8, 10], [-10, 0, 4, 5, 6, 20]) -pykx.LongVector(q('-1 0 2 2 3 5')) -``` - -### [binr](https://code.kx.com/q/ref/bin/#binr) - -Binary search right. - -```python ->>> pykx.q.binr([0, 2, 4, 6, 8, 10], 5) -pykx.LongAtom(q('3')) ->>> pykx.q.binr([0, 2, 4, 6, 8, 10], [-10, 0, 4, 5, 6, 20]) -pykx.LongVector(q('0 0 2 3 3 6')) -``` - -### [desc](https://code.kx.com/q/ref/desc/) - -Descending sort. - -```python ->>> pykx.q.desc([4, 2, 5, 1, 0]) -pykx.LongVector(q('5 4 2 1 0')) -``` - -### [differ](https://code.kx.com/q/ref/differ/) - -Find where list items change value. - -```python ->>> pykx.q.differ([1, 1, 2, 3, 4, 4]) -pykx.BooleanVector(q('101110b')) -``` - -### [distinct](https://code.kx.com/q/ref/distinct/) - -Unique items of a list. - -```python ->>> pykx.q.distinct([1, 3, 1, 4, 5, 1, 2, 3]) -pykx.LongVector(q('1 3 4 5 2')) -``` - -### [iasc](https://code.kx.com/q/ref/asc/#iasc) - -Ascending grade. - -```python ->>> pykx.q.iasc([4, 2, 5, 1, 0]) -pykx.LongVector(q('4 3 1 0 2')) -``` - -### [idesc](https://code.kx.com/q/ref/desc/#idesc) - -Descending grade. - -```python ->>> pykx.q.idesc([4, 2, 5, 1, 0]) -pykx.LongVector(q('2 0 1 3 4')) -``` - -### [rank](https://code.kx.com/q/ref/rank/) - -Position in the sorted list. - -Where x is a list or dictionary, returns for each item in x the index of where it would occur in the sorted list or dictionary. - -```python ->>> pykx.q.rank([4, 2, 5, 1, 0]) -pykx.LongVector(q('3 2 4 1 0')) ->>> pykx.q.rank({'c': 3, 'a': 4, 'b': 1}) -pykx.LongVector(q('2 0 1')) -``` - -### [xbar](https://code.kx.com/q/ref/xbar/) - -Round y down to the nearest multiple of x. - -```python ->>> pykx.q.xbar(5, 3) -pykx.LongAtom(q('0')) ->>> pykx.q.xbar(5, 5) -pykx.LongAtom(q('5')) ->>> pykx.q.xbar(5, 7) -pykx.LongAtom(q('5')) ->>> pykx.q.xbar(3, range(16)) -pykx.LongVector(q('0 0 0 3 3 3 6 6 6 9 9 9 12 12 12 15')) -``` - -### [xrank](https://code.kx.com/q/ref/xrank/) - -Group by value. - -```python ->>> pykx.q.xrank(3, range(6)) -pykx.LongVector(q('0 0 1 1 2 2')) ->>> pykx.q.xrank(4, range(9)) -pykx.LongVector(q('0 0 0 1 1 2 2 3 3')) -``` - -## Table - -### [cols](https://code.kx.com/q/ref/cols/#cols) - -Column names of a table. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df = pd.DataFrame({ -... 'time': numpy.array([1, 2, 3, 4], dtype='timedelta64[s]'), -... 'sym':['a', 'a', 'b', 'b'], -... 'p': pykx.LongVector([2, 4, 6, 8]) -... }) ->>> pykx.q.cols(df) -pykx.SymbolVector(q('`time`sym`p')) -``` - -### [csv](https://code.kx.com/q/ref/csv/) - -CSV delimiter. - -A synonym for "," for use in preparing text for CSV files, or reading them. - -```python ->>> pykx.q.csv -pykx.CharAtom(q('","')) -``` - -### [fkeys](https://code.kx.com/q/ref/fkeys/) - -Foreign-key columns of a table. - -```python ->>> pykx.q('f:([x:1 2 3]y:10 20 30)') -pykx.Identity(q('::')) ->>> pykx.q('t: ([]a:`f$2 2 2; b: 0; c: `f$1 1 1)') -pykx.Identity(q('::')) ->>> pykx.q.fkeys('t') -pykx.Dictionary(q(' -a| f -c| f -')) -``` - -### [insert](https://code.kx.com/q/ref/insert/) - -Insert or append records to a table. - -```python ->>> pykx.q('t: ([] a: `a`b`c; b: til 3)') ->>> pykx.q('t') -pykx.Table(q(' -a b ---- -a 0 -b 1 -c 2 -')) ->>> pykx.q.insert('t', ['d', 3]) ->>> pykx.q('t') -pykx.Table(q(' -a b ---- -a 0 -b 1 -c 2 -d 3 -')) -``` - -### [key](https://code.kx.com/q/ref/key/) - -Where x is a dictionary (or the name of one), returns its keys. - -```python ->>> pykx.q.key({'a': 1, 'b': 2}) -pykx.SymbolVector(q('`a`b')) -``` - -### [keys](https://code.kx.com/q/ref/keys/) - -Get the names of the key columns of a table. - -```python ->>> pykx.q['v'] = pykx.KeyedTable(data={'x': [4, 5, 6]}, index=[1, 2, 3]) ->>> pykx.q('v') -pykx.KeyedTable(pykx.q(' -idx| x ----| - -1 | 4 -2 | 5 -3 | 6 -')) ->>> pykx.q.keys('v') -pykx.SymbolVector(q(',`idx')) -``` - -### [meta](https://code.kx.com/q/ref/meta/) - -Metadata for a table. - -| Column | Information | -|--------|-------------| -| c | column name | -| t | [data type](https://code.kx.com/q/ref/#datatypes) | -| f | foreign key (enums) | -| a | [attribute](#attribute) | - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df = pd.DataFrame({ -... 'time': np.array([1, 2, 3, 4], dtype='timedelta64[s]'), -... 'sym': ['a', 'a', 'b', 'b'], -... 'p': pykx.LongVector([2, 4, 6, 8]) -... }) ->>> pykx.q.meta(df) -pykx.KeyedTable(q(' -c | t f a -----| ----- -time| n -sym | s -p | j -')) -``` - -### [ungroup](https://code.kx.com/q/ref/ungroup/) - -Where x is a table, in which some cells are lists, but for any row, all lists are of the same length, returns the normalized table, with one row for each item of a lists. - -```python ->>> a = pykx.Table([['a', [2, 3], 10], ['b', [5, 6, 7], 20], ['c', [11], 30]], columns=['s', 'x', 'q']) ->>> a -pykx.Table(pykx.q(' -s x q ------------- -a (2;3) 10 -b (5;6;7) 20 -c ,11 30 -')) ->>> pykx.q.ungroup(a) -pykx.Table(q(' -s x q -------- -a 2 10 -a 3 10 -b 5 20 -b 6 20 -b 7 20 -c 11 30 -')) -``` - -### [upsert](https://code.kx.com/q/ref/upsert/) - -Add new records to a table. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b'], 'p': pykx.LongVector([2, 4, 6, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------ -a 2 -a 4 -b 6 -b 8 -')) ->>> pykx.q.upsert(df, ['c', 10]) ->>> pykx.Table(q(' -sym p ------- -a 2 -a 4 -b 6 -b 8 -c 10 -')) -``` - -### [xasc](https://code.kx.com/q/ref/asc/#xasc) - -Sort a table in ascending order of specified columns. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b', 'c', 'c'], 'p': pykx.LongVector([10, 4, 6, 2, 0, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xasc('p', df) -pykx.Table(q(' -sym p ------- -c 0 -b 2 -a 4 -b 6 -c 8 -a 10 -')) -``` - -### [xcol](https://code.kx.com/q/ref/cols/#xcol) - -Rename table columns. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b', 'c', 'c'], 'p': pykx.LongVector([10, 4, 6, 2, 0, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xcol(pykx.SymbolVector(['Sym', 'Qty']), df) -pykx.Table(q(' -Sym Qty -------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xcol({'p': 'Qty'}, df) -pykx.Table(q(' -sym Qty -------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) -``` - -### [xcols](https://code.kx.com/q/ref/cols/#xcols) - -Reorder table columns. - -```python ->>> import pandas as pd ->>> import numpy as np ->>> df = pd.DataFrame({ -... 'time': np.array([1, 2, 3, 4], dtype='timedelta64[s]'), -... 'sym':['a', 'a', 'b', 'b'], -... 'p': pykx.LongVector([2, 4, 6, 8]) -... }) ->>> pykx.Table(df) -pykx.Table(q(' -time sym p --------------------------- -0D00:00:01.000000000 a 2 -0D00:00:02.000000000 a 4 -0D00:00:03.000000000 b 6 -0D00:00:04.000000000 b 8 -')) ->>> pykx.q.xcols(pykx.SymbolVector(['p', 'sym', 'time']), df) -pykx.Table(q(' -p sym time --------------------------- -2 a 0D00:00:01.000000000 -4 a 0D00:00:02.000000000 -6 b 0D00:00:03.000000000 -8 b 0D00:00:04.000000000 -')) -``` - -### [xdesc](https://code.kx.com/q/ref/desc/#xdesc) - -Sorts a table in descending order of specified columns. The sort is by the first column specified, then by the second column within the first, and so on. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b', 'c', 'c'], 'p': pykx.LongVector([10, 4, 6, 2, 0, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xdesc('p', df) -pykx.Table(q(' -sym p ------- -a 10 -c 8 -b 6 -a 4 -b 2 -c 0 -')) -``` - -### [xgroup](https://code.kx.com/q/ref/xgroup/) - -Groups a table by values in selected columns. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b', 'c', 'c'], 'p': pykx.LongVector([10, 4, 6, 2, 0, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xgroup('sym', df) -pykx.KeyedTable(q(' -sym| p ----| ---- -a | 10 4 -b | 6 2 -c | 0 8 -')) -``` - -### [xkey](https://code.kx.com/q/ref/keys/#xkey) - -Set specified columns as primary keys of a table. - -```python ->>> import pandas as pd ->>> df = pd.DataFrame({'sym':['a', 'a', 'b', 'b', 'c', 'c'], 'p': pykx.LongVector([10, 4, 6, 2, 0, 8])}) ->>> pykx.Table(df) -pykx.Table(q(' -sym p ------- -a 10 -a 4 -b 6 -b 2 -c 0 -c 8 -')) ->>> pykx.q.xkey('p', df) -pykx.KeyedTable(q(' -p | sym ---| --- -10| a -4 | a -6 | b -2 | b -0 | c -8 | c -')) -``` - -## Text - -### [like](https://code.kx.com/q/ref/like/) - -Whether text matches a pattern. - -```python ->>> pykx.q.like('quick', b'qu?ck') -pykx.BooleanAtom(q('1b')) ->>> pykx.q.like('brown', b'br[ao]wn') -pykx.BooleanAtom(q('1b')) ->>> pykx.q.like('quick', b'quickish') -pykx.BooleanAtom(q('0b')) -``` - -### [lower](https://code.kx.com/q/ref/lower/) - -Shift case to lower case. - -```python ->>> pykx.q.lower('HELLO') -pykx.SymbolAtom(q('`hello')) ->>> pykx.q.lower(b'HELLO') -pykx.CharVector(q('"hello"')) -``` - -### [ltrim](https://code.kx.com/q/ref/trim/#ltrim) - -Remove leading nulls from a list. - -```python ->>> pykx.q.ltrim(b' pykx ') -pykx.CharVector(q('"pykx "')) -``` - -### [md5](https://code.kx.com/q/ref/md5/) - -Message digest hash. - -```python ->>> pykx.q.md5(b'pykx') -pykx.ByteVector(q('0xfba0532951f022133f8e8b14b6ddfced')) -``` - -### [rtrim](https://code.kx.com/q/ref/trim/#rtrim) - -Remove trailing nulls from a list. - -```python ->>> pykx.q.rtrim(b' pykx ') -pykx.CharVector(q('" pykx"')) -``` - -### [ss](https://code.kx.com/q/ref/ss/) - -String search. - -```python ->>> pykx.q.ss(b'a cat and a dog', b'a') -pykx.LongVector(q('0 3 6 10')) -``` - -### [ssr](https://code.kx.com/q/ref/ss/#ssr) - -String search and replace. - -```python ->>> pykx.q.ssr(b'toronto ontario', b'ont', b'x') -pykx.CharVector(q('"torxo xario"')) -``` - -### [string](https://code.kx.com/q/ref/string/) - -Cast to string. - -```python ->>> pykx.q.string(2) -pykx.CharVector(q(',"2"')) ->>> pykx.q.string([1, 2, 3, 4, 5]) -pykx.List(q(' -,"1" -,"2" -,"3" -,"4" -,"5" -')) -``` - -### [trim](https://code.kx.com/q/ref/trim/) - -Remove leading and trailing nulls from a list. - -```python ->>> pykx.q.trim(b' pykx ') -pykx.CharVector(q('"pykx"')) -``` - -### [upper](https://code.kx.com/q/ref/lower/#upper) - -Shift case to upper case. - -```python ->>> pykx.q.upper('hello') -pykx.SymbolAtom(q('`HELLO')) ->>> pykx.q.upper(b'hello') -pykx.CharVector(q('"HELLO"')) -``` diff --git a/docs/api/read.md b/docs/api/read.md deleted file mode 100644 index d9cf093..0000000 --- a/docs/api/read.md +++ /dev/null @@ -1,3 +0,0 @@ -# Read - -::: pykx.read diff --git a/docs/api/toq.md b/docs/api/toq.md deleted file mode 100644 index 00f698e..0000000 --- a/docs/api/toq.md +++ /dev/null @@ -1,3 +0,0 @@ -# Convert to q - -::: pykx.toq diff --git a/docs/api/type_conversions.md b/docs/api/type_conversions.md deleted file mode 100644 index 6ca62f3..0000000 --- a/docs/api/type_conversions.md +++ /dev/null @@ -1,1227 +0,0 @@ -# Type Conversions - -A breakdown of each of the `pykx.K` types and their analogous `numpy`, `pandas`, and `pyarrow` types. - -??? "Cheat Sheet" - - PyKX type | Python type | Numpy dtype | Pandas dtype | PyArrow type | - ------------------------------- | ----------- | --------------- | --------------- | -------------- | - [List](#pykxlist) | list | object | object | Not Supported | - [Boolean](#pykxbooleanatom) | bool | bool | bool | Not Supported | - [GUID](#pykxguidatom) | uuid4 | uuid4 | uuid4 | uuid4 | - [Byte](#pykxbyteatom) | int | uint8 | uint8 | uint8 | - [Short](#pykxshortatom) | int | int16 | int16 | int16 | - [Int](#pykxintatom) | int | int32 | int32 | int32 | - [Long](#pykxlongatom) | int | int64 | int64 | int64 | - [Real](#pykxrealatom) | float | float32 | float32 | FloatArray | - [Float](#pykxfloatatom) | float | float64 | float64 | DoubleArray | - [Char](#pykxcharatom) | bytes | \|S1 | bytes8 | BinaryArray | - [Symbol](#pykxsymbolatom) | str | object | object | StringArray | - [Timestamp](#pykxtimestampatom) | datetime | datetime64[ns] | datetime64[ns] | TimestampArray | - [Month](#pykxmonthatom) | date | datetime64[M] | datetime64[ns] | Not Supported | - [Date](#pykxdateatom) | date | datetime64[D] | datetime64[ns] | Date32Array | - [Timespan](#pykxtimespanatom) | timedelta | timedelta[ns] | timedelta64[ns] | DurationArray | - [Minute](#pykxminuteatom) | timedelta | timedelta64[m] | timedelta64[ns] | Not Supported | - [Second](#pykxsecondatom) | timedelta | timedelta64[s] | timedelta64[ns] | DurationArray | - [Time](#TimeAtom) | timedelta | timedelta64[ms] | timedelta64[ns] | DurationArray | - [Dictionary](#pykxdictionary) | dict | Not Supported | Not Supported | Not Supported | - [Table](#pykxtable) | dict | records | DataFrame | Table | - -## `pykx.List` - -**Python** - -A python list of mixed types will be converted into a `pykx.List`. - -```Python ->>> pykx.List([1, b'foo', 'bar', 4.5]) -pykx.List(pykx.q(' -1 -"foo" -`bar -4.5 -')) -``` - -Calling `.py()` on a `pykx.List` will return a generic python list object where each object is converted into its analogous python type. - -```Python ->>> pykx.List([1, b'foo', 'bar', 4.5]).py() -[1, b'foo', 'bar', 4.5] -``` - -**Numpy** - -A numpy list with `dtype==object` containing data of mixed types will be converted into a `pykx.List` - -```Python ->>> pykx.List(np.array([1, b'foo', 'bar', 4.5], dtype=object)) -pykx.List(pykx.q(' -1 -"foo" -`bar -4.5 -')) -``` - -Calling `.np()` on a `pykx.List` object will return a numpy `ndarray` with `dtype==object` where each element has been converted into its closest analogous python type. - -```Python ->>> pykx.List([1, b'foo', 'bar', 4.5]).np() -array([1, b'foo', 'bar', 4.5], dtype=object) -``` - -**Pandas** - -Calling `.pd()` on a `pykx.List` object will return a pandas `Series` with `dtype==object` where each element has been converted into its closest analogous python type. - -```Python ->>> pykx.List([1, b'foo', 'bar', 4.5]).pd() -0 1 -1 b'foo' -2 bar -3 4.5 -dtype: object -``` - -## `pykx.BooleanAtom` - -**Python** - -The python bool type will be converted into a `pykx.BooleanAtom`. - -```Python ->>> pykx.BooleanAtom(True) -pykx.BooleanAtom(pykx.q('1b')) -``` - -Calling `.py()` on a `pykx.BooleanAtom` will return a python bool object. - -```Python ->>> pykx.BooleanAtom(True).py() -True -``` - -## `pykx.BooleanVector` - -**Python** - -A list of python bool types will be converted into a `pykx.BooleanVector`. - -```Python ->>> pykx.BooleanVector([True, False, True]) -pykx.BooleanVector(pykx.q('101b')) -``` - -Calling `.py()` on a `pykx.BooleanVector` will return a list of python bool objects. - -```Python ->>> pykx.BooleanVector([True, False, True]).py() -[True, False, True] -``` - -**Numpy, Pandas, Pyarrow** - -Converting a `pykx.BoolVector` will result in an array of objects with the `bool` `dtype`, arrays of that `dtype` can also be converted into `pykx.BoolVector` objects. - -## `pykx.GUIDAtom` - -**Python** - -The python uuid4 type from the `uuid` library will be converted into a `pykx.GUIDAtom`. - -```Python ->>> from uuid import uuid4 ->>> pykx.GUIDAtom(uuid4()) -pykx.GUIDAtom(pykx.q('012e8fb7-52c4-49e6-9b4e-93aa625ca3d7')) -``` - -Calling `.py()` on a `pykx.GUIDAtom` will return a python uuid4 object. - -```Python ->>> pykx.GUIDAtom(uuid4()).py() -UUID('d16f9f3f-2a57-4dfd-818e-04c9c7a53584') -``` - -## `pykx.GUIDVector` - -**Python** - -A list of python uuid4 types from the `uuid` library will be converted into a `pykx.GUIDVector`. - -```Python ->>> pykx.GUIDVector([uuid4(), uuid4()]) -pykx.GUIDVector(pykx.q('542ccbef-8aa1-4433-804a-7928172ec2d4 ff6f89fb-1aec-4073-821a-ce281ca6263e')) -``` - -Calling `.py()` on a `pykx.GUIDVector` will return a list of python uuid4 objects. - -```Python ->>> pykx.GUIDVector([uuid4(), uuid4()]).py() -[UUID('a3b284fc-5f31-4ba2-b521-fa8b5c309e02'), UUID('95ee9044-3930-492c-96f2-e336110de023')] -``` - -**Numpy, Pandas, PyArrow** - -Each of these will return an array of their respective object types around a list of uuid4 objects. - -## `pykx.ByteAtom` - -**Python** - -The python int type will be converted into a `pykx.ByteAtom`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.ByteAtom(1.0) -pykx.ByteAtom(pykx.q('0x01')) ->>> pykx.ByteAtom(1.5) -pykx.ByteAtom(pykx.q('0x01')) -``` - -Calling `.py()` on a `pykx.ByteAtom` will return a python int object. - -```Python ->>> pykx.ByteAtom(1.5).py() -1 -``` - -## `pykx.ByteVector` - -**Python** - -A list of python int types will be converted into a `pykx.ByteVector`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.ByteVector([1, 2.5]) -pykx.ByteVector(pykx.q('0x0102')) -``` - -Calling `.py()` on a `pykx.ByteVector` will return a list of python int objects. - -```Python ->>> pykx.ByteVector([1, 2.5]).py() -[1, 2] -``` - -**Numpy, Pandas, PyArrow** - -Converting a `pykx.ByteVector` will result in an array of objects with the `uint8` `dtype`, arrays of that `dtype` can also be converted into `pykx.ByteVector` objects. - -## `pykx.ShortAtom` - -**Python** - -The python int type will be converted into a `pykx.ShortAtom`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.ShortAtom(1) -pykx.ShortAtom(pykx.q('1h')) ->>> pykx.ShortAtom(1.5) -pykx.ShortAtom(pykx.q('1h')) -``` - -Calling `.py()` on a `pykx.ShortAtom` will return a python int object. - -```Python ->>> pykx.ShortAtom(1.5).py() -1 -``` - -## `pykx.ShortVector` - -**Python** - -A list of python int types will be converted into a `pykx.ShortVector`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.ShortVector([1, 2.5]) -pykx.ShortVector(pykx.q('1 2h')) -``` - -Calling `.py()` on a `pykx.ShortVector` will return a list of python int objects. - -```Python ->>> pykx.ShortVector([1, 2.5]).py() -[1, 2] -``` - -**Numpy, Pandas, PyArrow** - -Converting a `pykx.ShortVector` will result in an array of objects with the `int16` `dtype`, arrays of that `dtype` can also be converted into `pykx.ShortVector` objects. - -## `pykx.IntAtom` - -**Python** - -The python int type will be converted into a `pykx.IntAtom`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.IntAtom(1) -pykx.IntAtom(pykx.q('1i')) ->>> pykx.IntAtom(1.5) -pykx.IntAtom(pykx.q('1i')) -``` - -Calling `.py()` on a `pykx.IntAtom` will return a python int object. - -```Python ->>> pykx.IntAtom(1.5).py() -1 -``` - -## `pykx.IntVector` - -**Python** - -A list of python int types will be converted into a `pykx.IntVector`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.IntVector([1, 2.5]) -pykx.IntVector(pykx.q('1 2i')) -``` - -Calling `.py()` on a `pykx.IntVector` will return a list of python int objects. - -```Python ->>> pykx.IntVector([1, 2.5]).py() -[1, 2] -``` - -**Numpy, Pandas, PyArrow** - -Converting a `pykx.IntVector` will result in an array of objects with the `int32` `dtype`, arrays of that `dtype` can also be converted into `pykx.IntVector` objects. - -## `pykx.LongAtom` - -**Python** - -The python int type will be converted into a `pykx.LongAtom`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.LongAtom(1) -pykx.LongAtom(pykx.q('1')) ->>> pykx.LongAtom(1.5) -pykx.LongAtom(pykx.q('1')) -``` - -Calling `.py()` on a `pykx.LongAtom` will return a python int object. - -```Python ->>> pykx.LongAtom(1.5).py() -1 -``` - -## `pykx.LongVector` - -**Python** - -A list of python int types will be converted into a `pykx.LongVector`. - -Float types will also be converted but the decimal will be truncated away and no rounding done. - -```Python ->>> pykx.LongVector([1, 2.5]) -pykx.LongVector(pykx.q('1 2')) -``` - -Calling `.py()` on a `pykx.LongVector` will return a list of python int objects. - -```Python ->>>> pykx.LongVector([1, 2.5]).py() -[1, 2] -``` - -**Numpy, Pandas, PyArrow** - -Converting a `pykx.LongVector` will result in an array of objects with the `int64` `dtype`, arrays of that `dtype` can also be converted into `pykx.LongVector` objects. - -## `pykx.RealAtom` - -**Python** - -The python float and int types will be converted into a `pykx.RealAtom`. - -```Python ->>> pykx.RealAtom(2.5) -pykx.RealAtom(pykx.q('2.5e')) -``` - -Calling `.py()` on a `pykx.RealAtom` will return a python float object. - -```Python ->>>> pykx.RealAtom(2.5).py() -2.5 -``` - -## `pykx.RealVector` - -**Python** - -A list of python int and float types will be converted into a `pykx.RealVector`. - -```Python ->>> pykx.RealVector([1, 2.5]) -pykx.RealVector(pykx.q('1 2.5e')) -``` - -Calling `.py()` on a `pykx.RealVector` will return a list of python float objects. - -```Python ->>> pykx.RealVector([1, 2.5]).py() -[1.0, 2.5] -``` - -**Numpy, Pandas** - -Converting a `pykx.RealVector` will result in an array of objects with the `float32` `dtype`, arrays of that `dtype` can also be converted into `pykx.RealVector` objects. - - -**PyArrow** - -This will return a `PyArrow` array with the FloatArray type. - -## `pykx.FloatAtom` - -**Python** - -The python float and int types will be converted into a `pykx.FloatAtom`. - -```Python ->>> pykx.FloatAtom(2.5) -pykx.FloatAtom(pykx.q('2.5')) -``` - -Calling `.py()` on a `pykx.FloatAtom` will return a python float object. - -```Python ->>>> pykx.FloatAtom(2.5).py() -2.5 -``` - -## `pykx.FloatVector` - -**Python** - -A list of python int and float types will be converted into a `pykx.FloatVector`. - -```Python ->>> pykx.FloatVector([1, 2.5]) -pykx.FloatVector(pykx.q('1 2.5')) -``` - -Calling `.py()` on a `pykx.FloatVector` will return a list of python float objects. - -```Python ->>> pykx.FloatVector([1, 2.5]).py() -[1.0, 2.5] -``` - -**Numpy, Pandas** - -Converting a `pykx.FloatVector` will result in an array of objects with the `float64` `dtype`, arrays of that `dtype` can also be converted into `pykx.FloatVector` objects. - -**PyArrow** - -This will return a `PyArrow` array with the DoubleArray type. - -## `pykx.CharAtom` - -**Python** - -The python bytes type with length 1 will be converted into a `pykx.CharAtom`. - -```Python ->>> pykx.CharAtom(b'a') -pykx.CharAtom(pykx.q('"a"')) -``` - -Calling `.py()` on a `pykx.CharAtom` will return a python bytes object. - -```Python ->>> pykx.CharAtom(b'a').py() -b'a' -``` - -## `pykx.CharVector` - -**Python** - -The python bytes type with length greater than 1 will be converted into a `pykx.CharVector`. - -```Python ->>> pykx.CharVector(b'abc') -pykx.CharVector(pykx.q('"abc"')) -``` - -Calling `.py()` on a `pykx.CharVector` will return a python bytes object. - -```Python ->>> pykx.CharVector(b'abc').py() -b'abc' -``` - -**Numpy** - -Calling `.np()` on a `pykx.CharVector` will return a numpy `ndarray` with `dtype` `'|S1'`. - -```Python ->>> pykx.CharVector(b'abc').np() -array([b'a', b'b', b'c'], dtype='|S1') -``` - -Converting a `ndarray` of this `dtype` will create a `pykx.CharVector`. - -```Python ->>> pykx.CharVector(np.array([b'a', b'b', b'c'], dtype='|S1')) -pykx.CharVector(pykx.q('"abc"')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.CharVector` will return a pandas `Series` with `dtype` `bytes8`. - -```Python ->>> pykx.CharVector(b'abc').pd() -0 b'a' -1 b'b' -2 b'c' -dtype: bytes8 -``` -Converting a `Series` of this `dtype` will create a `pykx.CharVector`. - -```Python ->>> pykx.CharVector(pd.Series([b'a', b'b', b'c'], dtype=bytes)) -pykx.CharVector(pykx.q('"abc"')) -``` -**PyArrow** - -Calling `.pa()` on a `pykx.CharVector` will return a pyarrow `BinaryArray`. - -```Python - -[ - 61, - 62, - 63 -] -``` - -## `pykx.SymbolAtom` - -**Python** - -The python string type will be converted into a `pykx.SymbolAtom`. - -```Python ->>> pykx.toq('symbol') -pykx.SymbolAtom(pykx.q('`symbol')) -``` - -Calling `.py()` on a `pykx.SymbolAtom` will return a python string object. - -```Python ->>> pykx.toq('symbol').py() -'symbol' -``` -## `pykx.SymbolVector` - -**Python** - -A list of python string types will be converted into a `pykx.SymbolVector`. - -```Python ->>> pykx.SymbolVector(['a', 'b', 'c']) -pykx.SymbolVector(pykx.q('`a`b`c')) -``` - -Calling `.py()` on a `pykx.SymbolVector` will return a list of python string objects. - -```Python ->>> pykx.SymbolVector(['a', 'b', 'c']).py() -['a', 'b', 'c'] -``` - -**Numpy** - -Calling `.np()` on a `pykx.SymbolVector` will return a numpy `ndarray` of python strings with `dtype` `object`. - -```Python ->>> pykx.SymbolVector(['a', 'b', 'c']).np() -array(['a', 'b', 'c'], dtype=object) -``` - -Converting a `ndarray` of `dtype` `object` will create a `pykx.SymbolVector`. - -```Python ->>> pykx.SymbolVector(np.array(['a', 'b', 'c'], dtype=object)) -pykx.SymbolVector(pykx.q('`a`b`c')) -``` - -**Pandas** - -Calling `.pd()` on a `pykx.SymbolVector` will return a pandas `Series` with `dtype` `object`. - -```Python ->>> pykx.SymbolVector(['a', 'b', 'c']).pd() -0 a -1 b -2 c -dtype: object -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.SymbolVector` will return a pyarrow `StringArray`. - -```Python ->>> pykx.SymbolVector(['a', 'b', 'c']).pa() - -[ - "a", - "b", - "c" -] -``` - -## `pykx.TimestampAtom` - -**Python** - -The python datetime type will be converted into a `pykx.TimestampAtom`. - -```Python ->>> kx.TimestampAtom(datetime(2150, 10, 22, 20, 31, 15, 70713)) -pykx.TimestampAtom(pykx.q('2150.10.22D20:31:15.070713000')) -``` - -Calling `.py()` on a `pykx.TimestampAtom` will return a python datetime object. - -```Python ->>> kx.TimestampAtom(datetime(2150, 10, 22, 20, 31, 15, 70713)).py() -datetime.datetime(2150, 10, 22, 20, 31, 15, 70713) -``` - -## `pykx.TimestampVector` - -**Python** - -A list of python `datetime` types will be converted into a `pykx.TimestampVector`. - -```Python ->>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]) -pykx.TimestampVector(pykx.q('2150.10.22D20:31:15.070713000 2050.10.22D20:31:15.070713000')) -``` - -Calling `.py()` on a `pykx.TimestampVector` will return a list of python `datetime` objects. - -```Python ->>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).py() -[datetime.datetime(2150, 10, 22, 20, 31, 15, 70713), datetime.datetime(2050, 10, 22, 20, 31, 15, 70713)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.TimestampVector` will return a numpy `ndarray` with `dtype` `datetime64[ns]`. - -```Python ->>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).np() -array(['2150-10-22T20:31:15.070713000', '2050-10-22T20:31:15.070713000'], - dtype='datetime64[ns]') -``` - -Converting a `ndarray` of `dtype` `datetime64[ns]` will create a `pykx.TimestampVector`. - -```Python ->>> kx.TimestampVector(np.array(['2150-10-22T20:31:15.070713000', '2050-10-22T20:31:15.070713000'], dtype='datetime64[ns]')) -pykx.TimestampVector(pykx.q('2150.10.22D20:31:15.070713000 2050.10.22D20:31:15.070713000')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.TimestampVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. - -```Python ->>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).pd() -0 2150-10-22 20:31:15.070713 -1 2050-10-22 20:31:15.070713 -dtype: datetime64[ns] -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.TimestampVector` will return a pyarrow `TimestampArray`. - -```Python ->>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).pa() - -[ - 2150-10-22 20:31:15.070713000, - 2050-10-22 20:31:15.070713000 -] -``` - -## `pykx.MonthAtom` - -**Python** - -The python date type will be converted into a `pykx.MonthAtom`. - -```Python ->>> from datetime import date ->>> kx.MonthAtom(date(1972, 5, 1)) -pykx.MonthAtom(pykx.q('1972.05m')) -``` - -Calling `.py()` on a `pykx.MonthAtom` will return a python date object. - -```Python ->>> kx.MonthAtom(date(1972, 5, 1)).py() -datetime.date(1972, 5, 1) -``` - -## `pykx.MonthVector` - -**Python** - -A list of python `date` types will be converted into a `pykx.MonthVector`. - -```Python ->>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]) -pykx.MonthVector(pykx.q('1972.05 1999.05m')) -``` - -Calling `.py()` on a `pykx.MonthVector` will return a list of python `date` objects. - -```Python ->>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).py() -[datetime.date(1972, 5, 1), datetime.date(1999, 5, 1)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.MonthVector` will return a numpy `ndarray` with `dtype` `datetime64[M]`. - -```Python ->>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).np() -array(['1972-05', '1999-05'], dtype='datetime64[M]') -``` - -Converting a `ndarray` of `dtype` `datetime64[M]` will create a `pykx.MonthVector`. - -```Python ->>> kx.MonthVector(np.array(['1972-05', '1999-05'], dtype='datetime64[M]')) -pykx.MonthVector(pykx.q('1972.05 1999.05m')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.MonthVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. - -```Python ->>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() -0 1972-05-01 -1 1999-05-01 -dtype: datetime64[ns] -``` - -## `pykx.DateAtom` - -**Python** - -The python date type will be converted into a `pykx.DateAtom`. - -```Python ->>> kx.DateAtom(date(1972, 5, 31)) -pykx.DateAtom(pykx.q('1972.05.31')) -``` - -Calling `.py()` on a `pykx.DateAtom` will return a python date object. - -```Python ->>> kx.DateAtom(date(1972, 5, 31)).py() -datetime.date(1972, 5, 31) -``` - -## `pykx.DateVector` - -**Python** - -A list of python `date` types will be converted into a `pykx.DateVector`. - -```Python ->>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]) -pykx.DateVector(pykx.q('1972.05.01 1999.05.01')) -``` - -Calling `.py()` on a `pykx.DateVector` will return a list of python `date` objects. - -```Python ->>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).py() -[datetime.date(1972, 5, 1), datetime.date(1999, 5, 1)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.DateVector` will return a numpy `ndarray` of python strings with `dtype` `datetime64[D]`. - -```Python ->>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).np() -array(['1972-05-01', '1999-05-01'], dtype='datetime64[D]') -``` - -Converting a `ndarray` of `dtype` `datetime64[D]` will create a `pykx.DateVector`. - -```Python ->>> kx.DateVector(np.array(['1972-05-01', '1999-05-01'], dtype='datetime64[D]')) -pykx.DateVector(pykx.q('1972.05.01 1999.05.01')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.DateVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. - -```Python ->>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() -0 1972-05-01 -1 1999-05-01 -dtype: datetime64[ns] -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.DateVector` will return a pyarrow `Date32Array`. - -```Python ->>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pa() - -[ - 1972-05-01, - 1999-05-01 -] -``` - -## `pykx.Datetime` types - -**Python and Numpy** - -These types are deprecated and can only be accessed using the `raw` key word argument. - -Converting these types to python will return a float object or a `float64` object in numpy's case. - -```Python ->>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').py(raw=True) -[-730085.8297915857, -730085.8297915857] ->>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').np(raw=True) -array([-730085.82979159, -730085.82979159]) ->>> kx.q('0001.02.03T04:05:06.007, 0001.02.03T04:05:06.007').np(raw=True).dtype -dtype('float64') -``` - -## `pykx.TimespanAtom` - -**Python** - -The python `timedelta` type will be converted into a `pykx.TimespanAtom`. - -```Python ->>> from datetime import timedelta ->>> kx.TimespanAtom(timedelta(days=43938, seconds=68851, microseconds=664551)) -pykx.TimespanAtom(pykx.q('43938D19:07:31.664551000')) -``` - -Calling `.py()` on a `pykx.TimespanAtom` will return a python `timedelta` object. - -```Python ->>> kx.TimespanAtom(timedelta(days=43938, seconds=68851, microseconds=664551)).py() -datetime.timedelta(days=43938, seconds=68851, microseconds=664551) -``` - -## `pykx.TimespanVector` - -**Python** - -A list of python `timedelta` types will be converted into a `pykx.TimespanVector`. - -```Python ->>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]) -pykx.TimespanVector(pykx.q('43938D19:07:31.664551000 43938D19:07:31.664551000')) -``` - -Calling `.py()` on a `pykx.TimespanVector` will return a list of python `timedelta` objects. - -```Python ->>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).py() -[datetime.timedelta(days=43938, seconds=68851, microseconds=664551), datetime.timedelta(days=43938, seconds=68851, microseconds=664551)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.TimespanVector` will return a numpy `ndarray` of python strings with `dtype` `timedelta64[ns]`. - -```Python ->>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).np() -array([3796312051664551000, 3796312051664551000], dtype='timedelta64[ns]') -``` - -Converting a `ndarray` of `dtype` `datetime64[ns]` will create a `pykx.TimespanVector`. - -```Python ->>> kx.TimespanVector(np.array([3796312051664551000, 3796312051664551000], dtype='timedelta64[ns]')) -pykx.TimespanVector(pykx.q('43938D19:07:31.664551000 43938D19:07:31.664551000')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.TimespanVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - -```Python ->>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).pd() -0 43938 days 19:07:31.664551 -1 43938 days 19:07:31.664551 -dtype: timedelta64[ns] -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.TimespanVector` will return a pyarrow `DurationArray`. - -```Python ->>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).pa() - -[ - 3796312051664551000, - 3796312051664551000 -] -``` - -## `pykx.MinuteAtom` - -**Python** - -The python `timedelta` type will be converted into a `pykx.MinuteAtom`. - -```Python ->>> kx.MinuteAtom(timedelta(minutes=216)) -pykx.MinuteAtom(pykx.q('03:36')) -``` - -Calling `.py()` on a `pykx.MinuteAtom` will return a python `timedelta` object. - -```Python ->>> kx.MinuteAtom(timedelta(minutes=216)).py() -datetime.timedelta(seconds=12960) -``` - -## `pykx.MinuteVector` - -**Python** - -A list of python `timedelta` types will be converted into a `pykx.MinuteVector`. - -```Python ->>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]) -pykx.MinuteVector(pykx.q('03:36 01:07')) -``` - -Calling `.py()` on a `pykx.MinuteVector` will return a list of python `timedelta` objects. - -```Python ->>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).py() -[datetime.timedelta(seconds=12960), datetime.timedelta(seconds=4020)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.MinuteVector` will return a numpy `ndarray` of python strings with `dtype` `timedelta64[m]`. - -```Python ->>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).np() -array([216, 67], dtype='timedelta64[m]') -``` - -Converting a `ndarray` of `dtype` `timedelta64[m]` will create a `pykx.MinuteVector`. - -```Python ->>> kx.MinuteVector(np.array([216, 67], dtype='timedelta64[m]')) -pykx.MinuteVector(pykx.q('03:36 01:07')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.MinuteVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - -```Python ->>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).pd() -0 0 days 03:36:00 -1 0 days 01:07:00 -dtype: timedelta64[ns] -``` - -## `pykx.SecondAtom` - -**Python** - -The python `timedelta` type will be converted into a `pykx.SecondAtom`. - -```Python ->>> kx.SecondAtom(timedelta(seconds=13019)) -pykx.SecondAtom(pykx.q('03:36:59')) -``` - -Calling `.py()` on a `pykx.SecondAtom` will return a python `timedelta` object. - -```Python ->>> kx.SecondAtom(timedelta(seconds=13019)).py() -datetime.timedelta(seconds=13019) -``` - -## `pykx.SecondVector` - -**Python** - -A list of python `timedelta` types will be converted into a `pykx.SecondVector`. - -```Python ->>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]) -pykx.SecondVector(pykx.q('03:36:59 00:16:59')) -``` - -Calling `.py()` on a `pykx.SecondVector` will return a list of python `timedelta` objects. - -```Python ->>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).py() -[datetime.timedelta(seconds=13019), datetime.timedelta(seconds=1019)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.SecondVector` will return a numpy `ndarray` of python strings with `dtype` `timedelta64[s]`. - -```Python ->>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).np() -array([13019, 1019], dtype='timedelta64[s]') -``` - -Converting a `ndarray` of `dtype` `timedelta64[s]` will create a `pykx.SecondVector`. - -```Python ->>> kx.SecondVector(np.array([13019, 1019], dtype='timedelta64[s]')) -pykx.SecondVector(pykx.q('03:36:59 00:16:59')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.SecondVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - -```Python ->>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pd() -0 0 days 03:36:59 -1 0 days 00:16:59 -dtype: timedelta64[ns] -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.SecondVector` will return a pyarrow `DurationArray`. - -```Python ->>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pa() - -[ - 13019, - 1019 -] -``` - -## `pykx.TimeAtom` - -**Python** - -The python `timedelta` type will be converted into a `pykx.TimeAtom`. - -```Python ->>> kx.TimeAtom(timedelta(seconds=59789, microseconds=214000)) -pykx.TimeAtom(pykx.q('16:36:29.214')) -``` - -Calling `.py()` on a `pykx.TimeAtom` will return a python `timedelta` object. - -```Python ->>> kx.TimeAtom(timedelta(seconds=59789, microseconds=214000)).py() -datetime.timedelta(seconds=59789, microseconds=214000) -``` - -## `pykx.TimeVector` - -**Python** - -A list of python `timedelta` types will be converted into a `pykx.TimeVector`. - -```Python ->>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]) -pykx.TimeVector(pykx.q('16:36:29.214 06:36:29.214')) -``` - -Calling `.py()` on a `pykx.TimeVector` will return a list of python `timedelta` objects. - -```Python ->>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).py() -[datetime.timedelta(seconds=59789, microseconds=214000), datetime.timedelta(seconds=23789, microseconds=214000)] -``` - -**Numpy** - -Calling `.np()` on a `pykx.TimeVector` will return a numpy `ndarray` of python strings with `dtype` `timedelta64[ms]`. - -```Python ->>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).np() -array([59789214, 23789214], dtype='timedelta64[ms]') -``` - -Converting a `ndarray` of `dtype` `timedelta64[ms]` will create a `pykx.TimeVector`. - -```Python ->>> kx.TimeVector(np.array([59789214, 23789214], dtype='timedelta64[ms]')) -pykx.TimeVector(pykx.q('16:36:29.214 06:36:29.214')) -``` -**Pandas** - -Calling `.pd()` on a `pykx.TimeVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - -```Python ->>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pd() -0 0 days 16:36:29.214000 -1 0 days 06:36:29.214000 -dtype: timedelta64[ns] -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.TimeVector` will return a pyarrow `DurationArray`. - -```Python ->>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pa() - -[ - 59789214, - 23789214 -] -``` - -## `pykx.Dictionary` - -**Python** - -A python `dict` type will be converted into a `pykx.Dictionary`. - -```Python ->>> kx.Dictionary({'foo': b'bar', 'baz': 3.5, 'z': 'prime'}) -pykx.Dictionary(pykx.q(' -foo| "bar" -baz| 3.5 -z | `prime -')) -``` - -Calling `.py()` on a `pykx.Dictionary` will return a python `dict` object. - -```Python ->>> kx.Dictionary({'foo': b'bar', 'baz': 3.5, 'z': 'prime'}).py() -{'foo': b'bar', 'baz': 3.5, 'z': 'prime'} -``` - -## `pykx.Table` - -**Python** - -Calling `.py()` on a `pykx.Table` will return a python `dict` object. - -```Python ->>> kx.q('([] a: 10?10; b: 10?10)').py() -{'a': [5, 6, 4, 1, 3, 3, 7, 8, 2, 1], 'b': [8, 1, 7, 2, 4, 5, 4, 2, 7, 8]} -``` - -**Numpy** - -Calling `.np()` on a `pykx.Table` will return a numpy `record` array of the rows of the table with each type converted to it closest analogous numpy type. - -```Python ->>> kx.q('([] a: 10?10; b: 10?10)').np() -rec.array([(9, 9), (9, 7), (2, 6), (5, 6), (4, 4), (2, 7), (5, 8), (8, 4), - (7, 4), (9, 6)], - dtype=[('a', '>> kx.q('([] a: 10?10; b: 10?10)').pd() - a b -0 1 9 -1 0 7 -2 5 7 -3 1 1 -4 0 9 -5 0 1 -6 1 0 -7 7 8 -8 6 8 -9 3 3 -``` - -Converting a `pandas` `DataFrame` object will result in a `pykx.Table` object. - -```Python ->>> kx.Table(pd.DataFrame({'a': [x for x in range(10)], 'b': [float(x) for x in range(10)]})) -pykx.Table(pykx.q(' -a b ---- -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -')) -``` - -**PyArrow** - -Calling `.pa()` on a `pykx.Table` will return a pyarrow `Table`. - -```Python ->>> kx.q('([] a: 10?10; b: 10?10)').pa() -pyarrow.Table -a: int64 -b: int64 ----- -a: [[0,7,3,3,6,8,2,3,8,9]] -b: [[5,7,5,6,7,0,2,1,8,1]] -``` - -Converting a `pyarow` `Table` object will result in a `pykx.Table` object. - -```Python ->>> kx.Table(pa.Table.from_arrays([[1, 2, 3, 4], [5, 6, 7, 8]], names=['a', 'b'])) -pykx.Table(pykx.q(' -a b ---- -1 5 -2 6 -3 7 -4 8 -')) -``` diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md deleted file mode 100644 index e550328..0000000 --- a/docs/api/wrappers.md +++ /dev/null @@ -1,3 +0,0 @@ -# Q Type Wrappers - -::: pykx.wrappers diff --git a/docs/api/write.md b/docs/api/write.md deleted file mode 100644 index 6ce8449..0000000 --- a/docs/api/write.md +++ /dev/null @@ -1,3 +0,0 @@ -# Write - -::: pykx.write diff --git a/docs/beta-features/compress-encypt.md b/docs/beta-features/compress-encypt.md new file mode 100644 index 0000000..f4af86a --- /dev/null +++ b/docs/beta-features/compress-encypt.md @@ -0,0 +1,177 @@ +# Compression and Encryption + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` + +## Introduction + +With the volumes of sensitive data being produced within real-time applications today the ability to securely store this data and the ability to quickly access it can be challenging. PyKX provides users with a number of utilities, in the form of class objects, for the management of how data is compressed and encrypted when being persisted. + +### Compression + +The compression of data to disk is supported via PyKX allowing you to reduce disk space required for your persisted historical data. PyKX provides a variety of compression options allowing users to compress/decompress data using the following algorithms: + +- [`gzip`](https://en.wikipedia.org/wiki/Gzip) +- [`snappy`](https://en.wikipedia.org/wiki/Snappy_(compression)) +- [`zstd`](https://en.wikipedia.org/wiki/Zstd) +- [`LZ4HC`](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) + +In addition to this data can be compressed to KX's own qIPC format. For full information on KX file compression within kdb+/q see [here](https://code.kx.com/q/kb/file-compression/) + +### Encryption + +Data At Rest Encryption (DARE) is supported by PyKX with an explicit requirement on at least OpenSSL v1.0.2. To find out which version of OpenSSL you have available to you via PyKX you can find this using the following: + +```python +>>> import pykx as kx +>>> kx.ssl_info() +pykx.Dictionary(pykx.q(' +SSLEAY_VERSION | OpenSSL 1.1.1q 5 Jul 2022 +SSL_CERT_FILE | /usr/local/anaconda3/ssl/server-crt.pem +SSL_CA_CERT_FILE | /usr/local/anaconda3/ssl/cacert.pem +SSL_CA_CERT_PATH | /usr/local/anaconda3/ssl +SSL_KEY_FILE | /usr/local/anaconda3/ssl/server-key.pem +SSL_CIPHER_LIST | ECDBS-ECASD-CHACHA94-REAL305:ECDHE-RSM-CHACHA20-OOTH1305:.. +SSL_VERIFY_CLIENT| NO +SSL_VERIFY_SERVER| YES +')) +``` + +The encryption provided by this functionality specifically is Transparent Disk Encryption (TDE). TDE protects data at rest by encrypting database files on the hard drive and as a result on backup media. Encrypting your data with PyKX will be fully transparent to queries requiring no change to the logic used when querying data but will result in a time-penalty. + +To use this functionality a user must have a password protected master key available, ideally with a unique password of high-entropy. For more information on the generation of a master key and a password more information is available [here](https://code.kx.com/q/kb/dare/#configuration). + +## Functional walkthrough + +This walkthrough will demonstrate the following steps: + +- Create a compression objects to be used in global and per-partition data persistence. +- Persist a variety of Database partitions setting various compression configurations. +- Set the Python session to have globally configured encryption and compression settings. + +### Generating compression objects + +PyKX provides users with the ability to initialise compression and encryption class objects which can be used to set global configuration or by individual function operations. These respectively are supported via the `kx.Compress` and `kx.Encrypt` classes. For this section we will deal only with Compression. + +As mentioned in the introduction compression within PyKX is supported using a variety of algorithms, the full list of algorithms that are available as part of the `kx.CompressionAlgorithm` enumeration. + +```python +>>> import pykx as kx +>>> list(kx.CompressionAlgorithm) +[, , , , , ] +``` + +Further details can be found through the `help` command: + +```python +>>> help(kx.CompressionAlgorithm) +``` + +Once you are familiar with the options available to you it's time to initialize your first compression class. In this case generating a compression object which uses the `gzip` algorithm at compression level 8. + +```python +>>> import pykx as kx +>>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=8) +``` + +This object will be used in the remaining sections of the walkthrough to use in a local (one-shot) and global context. + +### Persisting Database partitions with various configurations + +Not all data is created equally, in time-series applications such as algorithmic trading it is often the case that older data is less valuable than newer data. As a result of this it is often the case when backfilling historical data that you may more agressively compress older datasets. The compression logic provided by PyKX allows users to persist different partitions within a historical database to different levels. + +1. Create a database with the most recent data uncompressed + + ```python + >>> import pykx as kx + >>> from datetime import date + >>> N = 10000 + >>> db = kx.DB(path='/tmp/db') + >>> qtable = kx.Table( + ... data={ + ... 'x': kx.random.random(N, 1.0), + ... 'x1': 5 * kx.random.random(N, 1.0), + ... 'x2': kx.random.random(N, ['a', 'b', 'c']) + ... } + ... ) + >>> db.create(qtable, 'tab', date(2020, 1, 1)) + ``` + +2. Add a new partition using gzip compression + + ```python + >>> gzip = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=4) + >>> qtable = kx.Table( + ... data={ + ... 'x': kx.random.random(N, 1.0), + ... 'x1': 5 * kx.random.random(N, 1.0), + ... 'x2': kx.random.random(N, ['a', 'b', 'c']) + ... } + ... ) + >>> db.create(qtable, 'tab', date(2020, 1, 2), compress=gzip) + ``` + +3. Add a final partition using `lz4hc` compression + + ```python + >>> lz4hc = kx.Compress(algo=kx.CompressionAlgorithm.lz4hc, level=10) + >>> qtable = kx.Table( + ... data={ + ... 'x': kx.random.random(N, 1.0), + ... 'x1': 5 * kx.random.random(N, 1.0), + ... 'x2': kx.random.random(N, ['a', 'b', 'c']) + ... } + ... ) + >>> db.create(qtable, 'tab', date(2020, 1, 3), compress=lz4hc) + ``` + +Presently you can look at information about the persistence characteristics of your data using `kx.q('-21!')`, for example: + +```python +>>> kx.q('-21!`:/tmp/db/2020.01.01/tab/x') +pykx.Dictionary(pykx.q('')) +>>> kx.q('-21!`:/tmp/db/2020.01.02/tab/x') +pykx.Dictionary(pykx.q(' +compressedLength | 5467 +uncompressedLength| 8016 +algorithm | 2i +logicalBlockSize | 17i +zipLevel | 4i +')) +>>> kx.q('-21!`:/tmp/db/2020.01.03/tab/x') +pykx.Dictionary(pykx.q(' +compressedLength | 6374 +uncompressedLength| 8016 +algorithm | 4i +logicalBlockSize | 17i +zipLevel | 10i +')) +``` + +### Globally initialise compression and encryption + +Global initialisation of compression and encryption allows all data that is persisted within from a process to be compressed. This can be useful when completing large batch operations on data where being specific about per partition/per file operations isn't necessary. In the below section we will deal with compression and encryption separately. + +The compression settings that are used by PyKX are globally readable via `kx.q.z.zd`, when unset this value will return a PyKX Identity value as follows: + +```python +>>> kx.q.z.zd +pykx.Identity(pykx.q('::')) +``` + +To set the process to use gzip globally this can be done using `global_init` on the generated `kx.Compress` object. + +```python +>>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=9) +>>> compress.global_init() +>>> kx.q.z.z.d +pykx.LongVector(pykx.q('17 2 9')) +``` + +Globally initialising encryption is completed through the loading of the users encryption key into the process as follows + +```python +>>> encrypt = kx.Encrypt(path='/path/to/my.key', password='PassWorD') +>>> encrypt.global_init() +``` diff --git a/docs/beta-features/db-management.md b/docs/beta-features/db-management.md index 5d0fdcf..1102bf4 100644 --- a/docs/beta-features/db-management.md +++ b/docs/beta-features/db-management.md @@ -23,7 +23,7 @@ This walkthrough will demonstrate the following steps: 1. Creating a copy of a column to the database 1. Applying a Python function to a column of the database 1. Updating the data type of a column -1. Adding a new table to the most recent partition of the database. +1. Adding a new table to the most recent partition of the database, setting compression for the partition. All integrations with the `Database Management` functionality are facilitated through use of the `pykx.DB` class. To follow along with the example outlined below you can use the [companion notebook](../examples/db-management.ipynb). This uses a more complex table but runs the same commands. For full information on the functions available you can reference the [API section](../api/db.md). @@ -195,7 +195,7 @@ To convert the data type of a column, you can use the `set_column_type` method. ### Adding a new table to the database -Now that you have successfully set up one table, you may want to add a second table named `quotes`. In this example, the `quotes` table only contains data for `2020.01.03`. We follow the same method as before and create the `quotes` table using the `create` method +Now that you have successfully set up one table, you may want to add a second table named `quotes`, additionally setting the persisted data to. In this example, the `quotes` table only contains data for `2020.01.03`. We follow the same method as before and create the `quotes` table using the `create` method. ```python >>> quotes = kx.Table(data={ @@ -205,7 +205,8 @@ Now that you have successfully set up one table, you may want to add a second ta ... 'low': kx.random.random(N, 10.0), ... 'close': kx.random.random(N, 10.0) ... }) ->>> db.create(quotes, 'quotes', date(2020, 1, 3), by_field = 'sym', sym_enum = 'symcol') +>>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=5) +>>> db.create(quotes, 'quotes', date(2020, 1, 3), by_field = 'sym', sym_enum = 'symcol', compress=compress) Writing Database Partition 2020-01-03 to table quotes ``` diff --git a/docs/blogs.md b/docs/blogs.md new file mode 100644 index 0000000..4ee878e --- /dev/null +++ b/docs/blogs.md @@ -0,0 +1,50 @@ +# Blogs, Articles and Videos + +KX, Partners and members of the public regularly post articles, blogs and videos relating to their usage of PyKX and how it can be used as part of solutions to real-world problems. The intention of this page is to centralise these blogs and articles and will be kept up to date regularly. + +!!! note "Want to contribute?" + + If you would like to contribute content to this site, feel free to raise a pull request [here](https://github.com/KxSystems/pykx/pull). We would love to hear from you. + +_Last updated:_ 8th March 2024 + +## Blogs + +| Title | Description | Author | Publication Date | +|-------|-------------|--------|------------------| +| [All Roads Lead to PyKX](https://www.habla.dev/blog/2023/07/31/all-roads-lead-to-pykx.html) | A fictional story outlining how a Python first CTO could begin to see the power of q/kdb+ via PyKX | Jesús López-González | 31st July 2023 | +| [All Roads Lead to Kdb: The Technical Counterpart](https://www.habla.dev/blog/2023/09/15/all-roads-lead-to-kdb-the-technical-counterpart) | An in-depth use-case outlining how a user can solve problems relating to traffic management using PyKX and kdb+/q | Oscar Nydza Nicpoñ, Marcos Vázquez Martín | 15th September 2023 | +| [PyKX Boosts Trade Analytics](https://www.treliant.com/knowledge-center/pykx-boosts-trade-analytics/) | An introduction to the fundamental features and functionality of PyKX | Paul Douglas, Paul Walsh, and Thomas Smyth | June 26th 2023 | +| [PyKX Highlights 2023](https://kx.com/blog/pykx-highlights-2023/) | A breakdown of new features and functionality added from January 2023 to version 2.1.1 in October 2023. | Rian Ó Cuinneagáin | 25th October 2023 | +| [Build and Manage Databases using PyKX](https://kx.com/blog/how-to-build-and-manage-databases-using-pykx/) | A breakdown of how PyKX can be used to generate and maintain kdb+ databases using newly released functionality | Conor McCarthy | 24th January 2024 | + + +## Articles + +| Title | Description | Publication Date | +|-------|-------------|------------------| +| [KX Brings the Power and Performance of kdb+ to Python Developers with PyKX](https://www.datanami.com/this-just-in/kx-brings-the-power-and-performance-of-kdb-to-python-developers-with-pykx/) | Market announcement of PyKX being made open source. | 7th June 2023 | + + + +## Videos + +### Accelerating Application Development with PyKX + +Jack Kiernan outlines the fundamentals of PyKX. + + + +### Accelerating Python Workflows using PyKX in kdb + +Nathan Crone and Andrew Morrison outline the basic building blocks of PyKX and how using q with PyKX allows users to accelerate analytic development in Finance. + + + + +### High Performance, Real-time Event Processing with PyKX + +Mohammad Noor and Oliver Stewart outline how Citadel make use of PyKX to accelerate real-time event processing using kdb+ and reducing the barrier for entry to those unfamiliar with q/kdb+. + + + diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index 42b90d7..0000000 --- a/docs/changelog.md +++ /dev/null @@ -1,409 +0,0 @@ -# Changelog - -## PyKX 1.6.3 - -### Fixes and Improvements - -- Fixed Pandas API use of ndim functionality which should return `2` when interacting with tables following the expected Pandas behaviour. -- Fixed an error when using the Pandas API to update a column with a `Symbols`, `Characters`, and `Generic Lists`. -- Prevent attempting to pass wrapped Python functions over IPC. -- Support IPC payloads over 4GiB. - -## PyKX 1.6.2 - -### Additions - -- Added `to_local_folder` kwarg to `install_into_QHOME` to enable use of `pykx.q` without write access to `QHOME`. -- Added [an example](examples/threaded_execution/README.md) that shows how to use `EmbeddedQ` in a multithreaded context where the threads need to modify global state. -- Added [PYKX_NO_SIGINT](user-guide/advanced/environment_variables.md) environment variable. - -### Fixes and Improvements - -- Fixed and issue causing a crash when closing `QConnection` instances on Windows. -- Updated q 4.0 libraries to 2023.08.11. Note: Mac ARM release remains on 2022.09.30. -- Fix [Jupyter Magic](getting-started/q_magic_command.md) in local mode. -- Fix error when binding with [FFI](https://github.com/KxSystems/ffi) in `QINIT`. -- Fix issue calling `peach` with `PYKX_RELEASE_GIL` set to true when calling a Python function. - -## PyKX 1.6.1 - -### Additions - -- Added `sorted`, `grouped`, `parted`, and `unique`. As methods off of `Tables` and `Vectors`. -- Added `PyKXReimport` class to allow subprocesses to reimport `PyKX` safely. - - Also includes `.pykx.safeReimport` in `pykx.q` to allows this behaviour when running under q as well. -- Added environment variables to specify a path to `libpython` in the case `pykx.q` cannot find it. - -### Fixes and Improvements - -- Fixed memory leaks within the various `QConnection` subclasses. -- Added deprecation warning around the discontinuing of support for Python 3.7. -- Fixed bug in Jupyter Notebook magic command. -- Fixed a bug causing `np.ndarray`'s to not work within `ufuncs`. -- Fixed a memory leak within all `QConnection` subclasses. Fixed for both `PyKX` as a client and as a server. -- Updated insights libraries to 4.0.2 -- Fixed `pykx.q` functionality when run on Windows. -- Fixed an issue where reimporting `PyKX` when run under q would cause a segmentation fault. -- Updated the warning message for the insights core libraries failing to load to make it more clear that no error has occured. - -## PyKX 1.6.0 - -### Additions - -- Added `merge_asof` to the Pandas like API. - - See [here](https://code.kx.com/pykx/user-guide/advanced/Pandas_API.html#tablemerge_asof) for details of supported keyword arguments and limitations. -- Added `set_index` to the Pandas like API. - - See [here](https://code.kx.com/pykx/user-guide/advanced/Pandas_API.html##setting-indexes) for details of supported keyword arguments and limitations. -- Added a set of basic computation methods operating on tabular data to the Pandas like API. See [here](https://code.kx.com/pykx/user-guide/advanced/Pandas_API.html#computations) for available methods and examples. -- `pykx.util.debug_environment` added to help with import errors. -- q vector type promotion in licensed mode. -- Added `.pykx.toraw` to `pykx.q` to enable raw conversions (e.g. `kx.toq(x, raw=True)`) -- Added support for Python `3.11`. - - Support for pyarrow in this python version is currently in Beta. -- Added the ability to use `kx.RawQConnection` as a Python based `q` server using `kx.RawQConnection(port=x, as_server=True)`. - - More documentation around using this functionality can be found [here](examples/server/server.html). - -### Fixes and Improvements - -- Improved error on Windows if `msvcr100.dll` is not found -- Updated q libraries to 2023.04.17 -- Fixed an issue that caused `q` functions that shared a name with python key words to be inaccessible using the context interface. - - It is now possible to access any `q` function that uses a python keyword as its name by adding an underscore to the name (e.g. `except` can now be accessed using `q.except_`). -- Fixed an issue with `.pykx.get` and `.pykx.getattr` not raising errors correctly. -- Fixed an issue where `deserializing` data would sometimes not error correctly. -- Users can now add new column(s) to an in-memory table using assignment when using the Pandas like API. - - ```python - >>> import os - >>> os.environ['PYKX_ENABLE_PANDAS_API'] = 'true' - >>> import pykx as kx - >>> import numpy as np - >>> tab = kx.q('([]100?1f;100?1f)') - >>> tab['x2'] = np.arange(0, 100) - >>> tab - pykx.Table(pykx.q(' - x x1 x2 - ------------------------- - 0.1485357 0.1780839 0 - 0.4857547 0.3017723 1 - 0.7123602 0.785033 2 - 0.3839461 0.5347096 3 - 0.3407215 0.7111716 4 - 0.05400102 0.411597 5 - .. - ')) - ``` - -## PyKX 1.5.3 - -### Additions - -- Added support for Pandas `Float64Index`. -- Wheels for ARM64 based Macs are now available for download. - -## PyKX 1.5.2 - -### Additions - -- Added support for ARM 64 Linux. - -## PyKX 1.5.1 - -### Fixes and Improvements - -- Fixed an issue with `pykx.q` that caused errors to not be raised properly under q. -- Fixed an issue when using `.pykx.get` and `.pykx.getattr` that caused multiple calls to be made. - -## PyKX 1.5.0 - -### Additions - -- Added wrappers around various `q` [system commands](https://code.kx.com/q/basics/syscmds/). -- Added `merge` method to tables when using the `Pandas API`. -- Added `mean`/`median`/`mode` functions to tables when using the `Pandas API`. -- Added various functions around type conversions on tables when using the `Pandas API`. - -### Fixes and Improvements - -- Fix to allow GUIDs to be sent over IPC. -- Fix an issue related to IPC connection using compression. -- Improved the logic behind loading `pykx.q` under a `q` process allowing it to run on MacOS and Linux in any environment that `EmbedPy` works in. -- Fix an issue that cause the default handler for `SIGINT` to be overwritten. -- `pykx.toq.from_callable` returns a `pykx.Composition` rather than `pykx.Lambda`. When executed returns an unwrapped q object. -- Fixed conversion of Pandas Timestamp objects. -- Fixed an issue around the `PyKX` `q` magic command failing to load properly. -- Fixed a bug around conversions of `Pandas` tables with no column names. -- Fixed an issue around `.pykx.qeval` not returning unwrapped results in certain scenarios. - -## PyKX 1.4.2 - -### Fixes and Improvements - -- Fixed an issue that would cause `EmbeddedQ` to fail to load. - -## PyKX 1.4.1 - -### Fixes and Improvements - -- Added constructors for `Table` and `KeyedTable` objects to allow creation of these objects from dictionaries and list like objects. -- Fixed a memory leak around calling wrapped `Foreign` objects in `pykx.q`. -- Fixed an issue around the `tls` keyword argument when creating `QConnection` instances, as well as a bug in the unlicensed behaviour of `SecureQConnection`'s. - -## PyKX 1.4.0 - -### Additions - -- Addition of a utility function `kx.ssl_info()` to retrieve the SSL configuration when running in unlicensed mode (returns the same info as kx.q('-26!0') with a license). -- Addition of a utility function `kx.schema.builder` to allow for the generation of `pykx.Table` and `pykx.KeyedTable` types with a defined schema and zero rows, this provides an alternative to writing q code to create an empty table. -- Added helper functions for inserting and upserting to `k.Table` instances. These functions provide new keyword arguments to run a test insert against the table or to enforce that the schema of the new row matches the existing table. -- Added environment variable `PYKX_NOQCE=1` to skip the loading of q Cloud Edition in order to speed up the import of PyKX. -- Added environment variable `PYKX_LOAD_PYARROW_UNSAFE=1` to import PyAarrow without the "subprocess safety net" which is here to prevent some hard crashes (but is slower than a simple import). -- Addition of method `file_execute` to `kx.QConnection` objects which allows the execution of a local `.q` script on a server instance as outlined [here](user-guide/advanced/ipc.md#file_execution). -- Added `kx.RawQConnection` which extends `kx.AsyncQConnection` with extra functions that allow a user to directly poll the send and receive selectors. -- Added environment variable `PYKX_RELEASE_GIL=1` to drop the [`Python GIL`](https://wiki.python.org/moin/GlobalInterpreterLock) on calls into embedded q. -- Added environment variable `PYKX_Q_LOCK=1` to enable a Mutex Lock around calls into q, setting this environment variable to a number greater than 0 will set the max length in time to block before raising an error, a value of '-1' will block indefinitely and will not error, any other value will cause an error to be raised immediately if the lock cannot be acquired. -- Added `insert` and `upsert` methods to `Table` and `KeyedTable` objects. - -### Fixes and Improvements - -- Fixed `has_nulls` and `has_infs` properties for subclasses of `k.Collection`. -- Improved error output of `kx.QConnection` objects when an error is raised within the context interface. -- Fixed `.py()` conversion of nested `k.Dictionary` objects and keyed `k.Dictionary` objects. -- Fixed unclear error message when querying a `QConnection` instance that has been closed. -- Added support for conversions of non C contiguous numpy arrays. -- Fixed conversion of null `GUIDAtom`'s to and from numpy types. -- Improved performance of converting `q` enums to pandas Categoricals. - -### Beta Features - -- Added support for a Pandas like API around `Table` and `KeyedTable` instances, documentation for the specific functionality can be found [here](user-guide/advanced/Pandas_API.ipynb). -- Added `.pykx.setdefault` to `pykx.q` which allows the default conversion type to be set without using environment variables. - -## PyKX 1.3.2 - -### Features and Fixes - -- Fixed support for using TLS with `SyncQConnection` instances. - -## PyKX 1.3.1 - -### Features and Fixes - -- Added environment variable `PYKX_Q_LIB_LOCATION` to specify a path to load the PyKX q libraries from. - - Required files in this directory - - If you are using the kdb+/q Insights core libraries they all must be present within this folder. - - The `read.q`, `write.q`, and `csvutil.q` libraries that are bundled with PyKX. - - A `q.k` that matches the version of `q` you are loading. - - There must also be a subfolder (`l64` / `m64` / `w64`) based on the platform you are using. - - Within this subfolder a copy of these files must also be present. - - `libq.(so / dylib)` / `q.dll`. - - `libe.(so / dylib)` / `e.dll`. - - If using the Insights core libraries their respective shared objects must also be present here. -- Updated core q libraries - - PyKX now supports M1 Macs - - OpenSSLv3 support -- Added ability to specify maximum length for IPC error messages. The default is 256 characters and this can be changed by setting the `PYKX_MAX_ERROR_LENGTH` environment variable. - -## PyKX 1.3.0 - -### Features and Fixes - -- Support for converting `datetime.datetime` objects with timezone information into `pykx.TimestampAtom`s and `pykx.TimestampVector`s. -- Added a magic command to run cells of q code in a Jupyter Notebook. The addition of `%%q` at the start of a Jupyter Notebook cell will allow a user to execute q code locally similarly to loading a q file. -- Added `no_ctx` key word argument to `pykx.QConnection` instances to disable sending extra queries to/from q to manage the context interface. -- Improvements to SQL interface for PyKX including the addition of support for prepared statements, execution of these statements and retrieval of inputs see [here](api/query.md#pykx.query.SQL) for more information. -- Fix to memory leak seen when converting Pandas Dataframes to q tables. -- Removed unnecessary copy when sending `q` objects over IPC. - -### Beta Features - -- EmbedPy replacement functionality `pykx.q` updated significantly to provide parity with embedPy from a syntax perspective. Documentation of the interface [here](api/pykx_under_q.md) provides API usage. Note that initialisation requires the first version of Python to be retrieved on a users `PATH` to have PyKX installed. Additional flexibility with respect to installation location is expected in `1.4.0` please provide any feedback to `pykx@kx.com` - -## PyKX 1.2.2 - -### Features and Fixes - -- Fixed an issue causing the timeout argument for `QConnection` instances to not work work properly. - -## PyKX 1.2.1 - -### Features and Fixes - -- Added support for OpenSSLv3 for IPC connections created when in 'licensed' mode. -- Updated conversion functionality for timestamps to support conversions within Pandas 1.5.0 - -## PyKX 1.2.0 - -### Features and Fixes - -- Support for converting any python type to a `q` Foreign object has been added. -- Support for converting Pandas categorical types into `pykx.EnumVector` type objects. -- Support for q querying against Pandas/PyArrow tables through internal conversion to q representation and subsequent query. `kx.q.qsql.select()` -- Support for casting Python objects prior to converting into K objects. (e.g. `kx.IntAtom(3.14, cast=True)` or `kx.toq("3.14", ktype=kx.FloatAtom, cast=True)`). -- Support usage of numpy [`__array_ufunc__`'s](https://numpy.org/doc/stable/reference/ufuncs.html) directly on `pykx.Vector` types. -- Support usage of numpy `__array_function__`'s directly on `pykx.Vector` types (Note: these will return a numpy ndarray object not an analogous `pykx.K` object). -- Improved performance of `pykx.SymbolVector` conversion into native Python type (e.g. `.py()` conversion for `pykx.SymbolVector`'s). -- Improved performance and memory usage of various comparison operators between `K` types. -- Improved performance of various `pykx.toq` conversions. -- `pykx.Vector` types will now automatically enlist atomic types instead of erroring. -- Fixed conversions of numpy float types into `pykx.FloatAtom` and `pykx.RealAtom` types. -- Fixed conversion of `None` Python objects into analogous null `K` types if a `ktype` is specified. -- Added `event_loop` parameter to `pykx.AsyncQConnection` that takes a running event loop as a parameter and allows the event loop to manage `pykx.QFuture` objects. - -### Beta Features - -- Added extra functionality to `pykx.q` related to the calling and use of python foreign objects directly within a `q` process. -- Support for [NEP-49](https://numpy.org/neps/nep-0049.html), which allows numpy arrays to be converted into `q` Vectors without copying the underlying data. This behaviour is opt-in and you can do so by setting the environment variable `PYKX_ALLOCATOR` to 1, "1" or True or by adding the flag `--pykxalloc` to the `QARGS` environment variable. Note: This feature also requires a python version of at least 3.8. -- Support the ability to trigger early garbage collection of objects in the `q` memory space by adding `--pykxgc` to the QARGS environment variable, or by setting the `PYKX_GC` environment variable to 1, "1" or True. - -## PyKX 1.1.1 - -### Features & Fixes - -- Added ability to skip symlinking `$QHOME` to `PyKX`'s local `$QHOME` by setting the environment variable `IGNORE_QHOME`. - -## PyKX 1.1.0 - -### Dependencies - -- The dependency on the system library `libcurl` has been made optional for Linux. If it is missing on Linux, a warning will be emitted instead of an error being raised, and the KX Insights Core library `kurl` will not be fully loaded. Windows and macOS are unaffected, as they don't support the KX Insights Core features to begin with. - -### Features & Fixes - -- Splayed and partitioned tables no longer emit warnings when instantiated. -- Added `pykx.Q.sql`, which is a wrapper around [KXI Core SQL](https://code.kx.com/insights/core/sql.html#sql-language-support). -- `.pykx.pyexec` and `.pykx.pyeval` no longer segfault when called with a character atom. -- Updated several `pykx.toq` tests so that they would not randomly fail. -- Fixed error when pickling `pykx.util.BlockManager` in certain esoteric situations. -- Fixed `pandas.MultiIndex` objects created by PyKX having `pykx.SymbolAtom` objects within them - now they have `str` objects instead, as they normally would. -- Upgraded the included KX Insights Core libraries to version 3.0.0. -- Added `pykx.toq.from_datetime_date`, which converts `datetime.date` objects into any q temporal atom that can represent a date (defaulting to a date atom). -- Fixed error when user specifies `-s` or `-q` in `$QARGS`. -- Fixed recursion error when accessing a non-existent attribute of `pykx.q` while in unlicensed mode. Now an attribute error is raised instead. -- Fixed build error introduced by new rules enforced by new versions of setuptools. -- Added `pykx.Anymap`. -- Fixed support for `kx.lic` licenses. -- The KXIC libraries are now loaded after q has been fully initialized, rather than during the initialization. This significantly reduces the time it takes to import PyKX. -- PyKX now uses a single location for `$QHOME`: its `lib` directory within the installed package. The top-level contents of the `$QHOME` directory (prior to PyKX updating the env var when embedded q is initialized) will be symlinked into PyKX's `lib` directory, along with the content of any subdirectories under `lib` (e.g. `l64`, `m64`, `w64`). This enables loading scripts and libraries located in the original `$QHOME` directory during q initialization. -- Improved performance (both execution speed and memory usage) of calling `np.array` on `pykx.Vector` instances. The best practice is still to use the `np` method instead of calling `np.array` on the `pykx.Vector` instance. -- `pykx.Vector` is now a subclass of `collections.abc.Sequence`. -- `pykx.Mapping` is not a subclass of `collections.abc.Mapping`. -- Split `pykx.QConnection` into `pykx.SyncQConnection` and `pykx.AsyncQConnection` and added support for asynchronous IPC with `q` using `async`/`await`. Refer to [the `pykx.AsyncQConnection` docs](api/ipc.md#pykx.ipc.AsyncQConnection) for more details. -- Pandas dataframes containing Pandas extension arrays not originally created as Numpy arrays would result in errors when attempting to convert to q. For example a Dataframe with index of type `pandas.MultiIndex.from_arrays` would result in an error in conversion. -- Improved performance of converting `pykx.SymbolVector` to `numpy.array` of strings, and also the conversion back from a `numpy.array` of `strings` to a `q` `SymbolVector`. -- Improved performance of converting `numpy.array`'s of `dtype`s `datetime64`/`timedelta64 ` to the various `pykx.TemporalTypes`. - -## PyKX 1.0.1 - -### Deprecations & Removals - -- The `sync` parameter for `pykx.QConnection` and `pykx.QConnection.__call__` has been renamed to the less confusing name `wait`. The `sync` parameter remains, but its usage will result in a `DeprecationWarning` being emitted. The `sync` parameter will be removed in a future version. - -### Features & Fixes -- Updated to stable classifier (`Development Status :: 5 - Production/Stable`) in project metadata. Despite this update being done in version 1.0.1, version 1.0.0 is still the first stable release of PyKX. -- PyKX now provides source distributions (`sdist`). It can be downloaded from PyPI using `pip download --no-binary=:all: --no-deps pykx`. As noted in [the installation docs](getting-started/installing.md#supported-environments), installations built from the source will only receive support on a best-effort basis. -- Fixed Pandas NaT conversion to q types. Now `pykx.toq(pandas.NaT, ktype=ktype)` produces a null temporal atom for any given `ktype` (e.g. `pykx.TimeAtom`). -- Added [a doc page for limitations of embedded q](user-guide/advanced/limitations.md). -- Added a test to ensure large vectors are correctly handled (5 GiB). -- Always use synchronous queries internally, i.e. fix `QConnection(sync=False)`. -- Disabled the context interface over IPC. This is a temporary measure that will be reversed once q function objects are updated to run in the environment they were defined in by default. -- Reduced the time it takes to import PyKX. There are plans to reduce it further, as `import pykx` remains fairly slow. -- Updated to [KXI Core 2.1](https://code.kx.com/insights/core/release-notes/2.1.0.html) & rename `qce` -> `kxic`. -- Misc test updates. -- Misc doc updates. - -## PyKX 1.0.0 - -### Migration Notes - -To switch from Pykdb to PyKX, you will need to update the name of the dependency from `pykdb` to `pykx` in your `pyproject.toml`/`requirements.txt`/`setup.cfg`/etc. When Pykdb was renamed to PyKX, its version number was reset. The first public release of PyKX has the version number 1.0.0, and will employ [semantic versioning](https://semver.org/). - -Pay close attention to the renames listed below, as well as the removals. Many things have been moved to the top-level, or otherwise reorganized. A common idiom with Pykdb was the following: - -```python -from pykdb import q, k -``` - -It is recommended that the following be used instead: - -```python -import pykx as kx -``` - -This way the many attributes at the top-level can be easily accessed without any loss of context, for example: - -```python -kx.q # Can be called to execute q code -kx.K # Base type for objects in q; can be used to convert a Python object into a q type -kx.SymbolAtom # Type for symbol atoms; can be used to convert a `str` or `bytes` into a symbol atom -kx.QContext # Represents a q context via the PyKX context interface -kx.QConnection # Can be called to connect to a q process via q IPC -kx.PyKXException # Base exception type for exceptions specific to PyKX and q -kx.QError # Exception type for errors that occur in q -kx.LicenseException # Exception type raised when features that require a license are used without -kx.QHOME # Path from which to load q files, set by $QHOME environment variable -kx.QARGS # List of arguments provided to the embedded q instance at startup, set by $QARGS environment variable -# etc. -``` - -You can no longer rely on the [context](api/ctx.md) being reset to the global context after each call into embedded q, however IPC calls are unaffected. - -### Renames -- Pykdb has been renamed to PyKX. `Pykdb` -> `PyKX`; `PYKDB` -> `PYKX`; `pykdb` -> `pykx`. -- The `adapt` module has been renamed to `toq`, and it can be called directly. Instead of `pykdb.adapt.adapt(x)` one should write `pykx.toq(x)`. -- The `k` module has been renamed to `wrappers`. All wrapper classes can be accessed from the top-level, i.e. `pykx.K`, `pykx.SymbolAtom`, etc. -- The "module interface" (`pykdb.module_interface`) has been renamed to the "context interface" (`pykx.ctx`). All `pykx.Q` instances (i.e. `pykx.q` and all `pykx.QConnection` instances) have a `ctx` attribute, which is the global `QContext` for that `pykx.Q` instance. Usually, one need not directly access the global context. Instead, one can access its subcontexts directly e.g. `q.dbmaint` instead of `q.ctx.dbmaint`. -- `KdbError` (and its subclasses) have been renamed to `QError` -- `pykdb.ctx.KdbContext` has been renamed to `pykx.ctx.QContext`, and is available from the top-level, i.e. `pykx.QContext`. -- The `Connection` class in the IPC module has been renamed to `QConnection`, and is now available at the top-level, i.e. `pykx.QConnection`. -- The q type wrapper `DynamicLoad` has been renamed to `Foreign` (`pykdb.k.DynamicLoad` -> `pykx.Foreign`). - -### Deprecations & Removals -- The `pykdb.q.ipc` attribute has been removed. The IPC module can be accessed directly instead at `pykx.ipc`, but generally one will only need to access the `QConnection` class, which can be accessed at the top-level: `pykx.QConnection`. -- The `pykdb.q.K` attribute has been removed. Instead, `K` types can be used as constructors for that type by leveraging the `toq` module. For example, instead of `pykdb.q.K(x)` one should write `pykx.K(x)`. Instead of `pykx.q.K(x, k_type=pykx.k.SymbolAtom)` one should write `pykx.SymbolAtom(x)` or `pykx.toq(x, ktype=pykx.SymbolAtom)`. -- Most `KdbError`/`QError` subclasses have been removed, as identifying them is error prone, and we are unable to provide helpful error messages for most of them. -- The `pykx.kdb` singleton class has been removed. - -### Dependencies -- More Numpy, Pandas, and PyArrow versions are supported. Current `pandas~=1.0`, `numpy~=1.20,<1.22`, and `pyarrow>=3.0.0` are supported. PyArrow remains an optional dependency. -- A dependency on `find-libpython~=0.2` was added. This is only used when running PyKX under a q process (see details in the section below about new alpha features). -- A dependency on the system library `libcurl` was added for Linux. This dependency will be made optional in a future release. - -### Features & Fixes -- The `pykx.Q` class has been added as the base class for `pykx.EmbeddedQ` (the class for `pykx.q`) and `pykx.QConnection`. -- The `pykx.EmbeddedQ` process now persists its [context](api/ctx.md) between calls. -- The console now works over IPC. -- The query module now works over IPC. Because `K` objects hold no reference to the `q` instance that created them (be it local or over IPC), `K` tables no longer have `select`/`exec`/`update`/`delete` methods with themselves projected in as the first argument. That is to say, instead of writing `t.select(...)`, write `q.qsql.select(t, ...)`, where `q` is either `pykx.q` or an instance of `pykx.QConnection`, and `t` was obtained from `q`. -- The context interface now works over IPC. -- Nulls and infinities are now handled as nulls and infinities, rather than as their underlying values. `pykx.Atom.is_null`, `pykx.Atom.is_inf`, `pykx.Collection.has_nulls`, and `pykx.Collection.has_infs` have been added. Numpy, Pandas, and PyArrow handles integral nulls with masked arrays, and they handle temporal nulls with `NaT`. `NaN` continues to be used for real/float nulls. The general Python representation (from `.py()`) uses `K` objects for nulls and infinities. -- Calling `bool` on `pykx.K` objects now either raises a `TypeError`, or return the unambiguously correct result. For ambiguous cases such as `pykx.Collection` instances, use `.any()`, `.all()`, or a length check instead. -- Assignment to q reserved words or the q context now raises a `pykx.PyKXException`. -- `pykx.toq.from_list` (previously `pykdb.adapt.adapt_list`) now works in unlicensed mode. -- `q.query` and `q.sql` are now placeholders (set to `None`). The query interface can be accessed from `q.qsql`. -- Ternary `pow` now raises `TypeError` for `RealNumericVector` and `RealNumericAtom`. -- `QContext` objects are now context handlers, e.g. `with pykx.q.dbmaint: # operate in .dbmaint within this block`. This context handler supports arbitrary nesting. -- `__getitem__` now raises a `pykx.LicenseException` when used in unlicensed mode. Previously it worked for a few select types only. If running in unlicensed mode, one should perform all q indexing in the connected q process, and all Python indexing after converting the `K` object to a Python/Numpy/Pandas/PyArrow object. -- `pykx.QConnection` (previously `pykdb.ipc.Connection`) objects now have an informative/idiomatic repr. -- Calls to `pykx.q` now support up to 8 arguments beyond the required query at position 0, similar to calling `pykx.QConnection` instances. These arguments are applied to the result of the query. -- Embedded q is now used to count the number of rows a table has. -- All dynamic linking to `libq` and `libe` has been replaced by dynamic loading. As a result, the modules previously known as `adapt` and `adapt_unlicensed` have been unified under `pykx.toq`. -- PyKX now attempts to initialize embedded q when `pykx` is imported, rather than when `pykx.q` is first accessed. As a result, the error-prone practice of supplying the `pykx.kdb` singleton class with arguments for embedded q is now impossible. -- Arguments for embedded q can now be supplied via the environment variable `$QARGS` in the form of command-line arguments. For example, `QARGS='--unlicensed'` causes PyKX to enter unlicensed mode when it is started, and `QARGS='-o 8'` causes embedded q to use an offset from UTC of 8 hours. These could be combined as `QARGS='--unlicensed -o 8'`. -- Added the `--licensed` startup flag (to be provided via the `$QARGS` environment variable), which can be used to raise a `pykx.PyKXException` (rather than emitting a warning) if PyKX fails to start in licensed mode (likely because of a missing/invalid q license). -- PyKX Linux wheels are now [PEP 600](https://peps.python.org/pep-0600/) compliant, built to the `manylinux_2_17` standard. -- Misc other bug fixes. -- Misc doc improvements. - -### Performance Improvements - -- Converting nested lists from q to Python is much faster. -- Internally, PyKX now calls q functions with arguments directly instead of creating a `pykx.Function` instance then calling it. This results in modest performance benefits in some cases. -- The context interface no longer loads every element of a context when the context is first accessed, thereby removing the computation spike, which could be particularly intense for large q contexts. - -### New Alpha Features - -!!! danger "Alpha features are subject to change" - - Alpha features are not stable will be subject to changes without notice. Use at your own risk. - -- q can now load PyKX by loading the q file `pykx.q`. `pykx.q` can be copied into `$QHOME` by running `pykx.install_into_QHOME()`. When loaded into q, it will define the `.pykx` namespace, which notably has `.pykx.exec` and `.pykx.pyeval`. This allows for Python code to be run within q libraries and applications without some of the limitations of embedded q such as the lack of the q main loop, or the lack of timers. When q loads `pykx.q`, it attempts to source the currently active Python environment by running `python`, then fetching the environment details from it. diff --git a/docs/examples/compress_and_encrypt/archive.zip b/docs/examples/compress_and_encrypt/archive.zip index b6d2989..2bea4be 100644 Binary files a/docs/examples/compress_and_encrypt/archive.zip and b/docs/examples/compress_and_encrypt/archive.zip differ diff --git a/docs/examples/compress_and_encrypt/compress_and_encrypt.py b/docs/examples/compress_and_encrypt/compress_and_encrypt.py index 84727b7..6928d7a 100644 --- a/docs/examples/compress_and_encrypt/compress_and_encrypt.py +++ b/docs/examples/compress_and_encrypt/compress_and_encrypt.py @@ -1,11 +1,10 @@ from enum import Enum import json -from math import log2 from pathlib import Path from textwrap import dedent -from typing import Dict, Optional, Union +from typing import Dict, Union -import pykx +import pykx as kx from pykx import q @@ -18,81 +17,27 @@ class CompressionAlgorithm(Enum): lz4hc = 4 -def load_master_key(key_path: Path, password: Union[str, bytes]) -> None: - """Loads the master key into the q process. - - Must be run prior to encrypting anything using the q process. - """ - load_master_key = q('-36!') - load_master_key([key_path, bytes(password, 'utf-8')]) - - -def compress(source: Union[Path, pykx.K], +def compress(source: Union[Path, kx.K], target: Path, - block_size: int = 2**17, - algorithm: CompressionAlgorithm = CompressionAlgorithm.none, - compression_level: Optional[int] = None, - encrypt: bool = False ) -> Dict[str, int]: """Compresses (and/or encrypts) a K object written to disk or in-memory Parameters: source: What will be compressed/encrypted. Either a path to a q object on disk or a - `pykx.K` object. + `kx.K` object. target: The path the compressed/encrypted data will be written to. - block_size: The size of the compressed blocks. Must be a power of `2` (i.e. `2 ** 17` for - 128 kB blocks). Minimum varies by platform, but generally a block size between - `2 ** 12` and `2 ** 20` is advisable. - algorithm: The compression algorithm to be used. - compression_level: How compressed the data should be. Varies by selected algorithm. The - valid values for each algorithm are shown below: - - algorithm | compression level - --------- | ----------------- - `none` | `0` - `ipc` | `0` - `gzip` | `0`-`9` - `snappy` | `0` - `lz4hc` | `1`-`12` - - Defaults to the maximum compression level for the selected algorithm. - - encrypt: Whether the data should be encrypted. The master key must be loaded. Returns: Info about the compressed data. """ - if isinstance(source, pykx.K): + if isinstance(source, kx.K): _compress = q('{y set x}') else: - _compress = lambda x, y: q('-19!', (x, *y)) # noqa: E731 - - compression_stats = q('-21!') - - if block_size & (block_size - 1): - raise ValueError(f'block_size must be a power of 2, not {block_size}') + _compress = lambda x, y: q("{y set x}", x, [y, *q.z.zd.py()]) # noqa: E731 - compression_range = { - CompressionAlgorithm.none: range(0, 1), - CompressionAlgorithm.ipc: range(0, 1), - CompressionAlgorithm.gzip: range(0, 10), - CompressionAlgorithm.snappy: range(0, 1), - CompressionAlgorithm.lz4hc: range(1, 13), - }[algorithm] + _compress(source, target) - if compression_level is None: - compression_level = compression_range.stop - 1 - elif compression_level not in compression_range: - raise ValueError( - f'Invalid compression level {compression_level} for {algorithm} ' - f'algorithm. Valid range is {compression_range}') - - return compression_stats(_compress(source, [ - target, - int(log2(block_size)), - algorithm.value + (16 if encrypt else 0), - compression_level - ])).py() + return q('-21!', target).py() def setup(): @@ -116,24 +61,24 @@ def setup(): def demo(): print('Writing in-memory trades table with gzip:', end=' ') - print(json.dumps(compress( # Using json for pretty printing + kx.Compress(kx.CompressionAlgorithm.gzip).global_init() + print(json.dumps(compress( q('trades'), - Path('./trades_compressed_gzip'), - algorithm=CompressionAlgorithm.gzip + Path('./trades_compressed_gzip') ), indent=4), end='\n\n') print('Writing in-memory trades table with snappy:', end=' ') + kx.Compress(kx.CompressionAlgorithm.snappy).global_init() print(json.dumps(compress( # Using json for pretty printing q('trades'), Path('./trades_compressed_snappy'), - algorithm=CompressionAlgorithm.snappy ), indent=4), end='\n\n') print('Writing in-memory trades table with lz4hc:', end=' ') + kx.Compress(algo=kx.CompressionAlgorithm.lz4hc, level=5).global_init() print(json.dumps(compress( # Using json for pretty printing q('trades'), - Path('./trades_compressed_lz4hc'), - algorithm=CompressionAlgorithm.lz4hc + Path('./trades_compressed_lz4hc') ), indent=4), end='\n\n') print('Writing on-disk trades table with lz4hc:', end=' ') @@ -142,7 +87,6 @@ def demo(): print(json.dumps(compress( # Using json for pretty printing source, Path('./trades_ondisk_compressed_lz4hc'), - algorithm=CompressionAlgorithm.lz4hc ), indent=4), end='\n\n') # WARNING: Do not use this key for anything in production! This is @@ -150,14 +94,12 @@ def demo(): # https://code.kx.com/q/kb/dare/#configuration for information about # generating a key. print('Loading master key\n') - load_master_key(Path(__file__).parent/'demokey.key', 'demokeypass') + kx.Encrypt(Path(__file__).parent/'demokey.key', 'demokeypass').load_key() print('Writing in-memory trades table with lz4hc and encryption:', end=' ') print(json.dumps(compress( # Using json for pretty printing q('trades'), - Path('./trades_encrypted_compressed_lz4hc'), - algorithm=CompressionAlgorithm.lz4hc, - encrypt=True + Path('./trades_encrypted_compressed_lz4hc') ), indent=4), end='\n\n') diff --git a/docs/examples/subsciber/archive.zip b/docs/examples/subsciber/archive.zip deleted file mode 100644 index 8507173..0000000 Binary files a/docs/examples/subsciber/archive.zip and /dev/null differ diff --git a/docs/examples/subsciber/readme.md b/docs/examples/subsciber/readme.md deleted file mode 100644 index 9b4c282..0000000 --- a/docs/examples/subsciber/readme.md +++ /dev/null @@ -1,89 +0,0 @@ -# PyKX Subscribing to a `q` Process - -The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` to subscribe to a running q process. - -To follow along with this example please feel free to download this zip archive that contains a copy of the python script and this writeup. - -## Quickstart - -This example creates a python subscriber to a q process, that appends data received to the end of a table. - -Here we have: - -1. A q process running on port 5001 -2. A Python process subscribing to the q process - -### Start the required q processes - -```q -// run q -$ q -p 5001 -q) -``` - -### Start the pykx subscriber - -```bash -// run the subscriber which will automatically connect -$ python subscriber.py -``` - -### Outcome - -What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are recieved. - -```q -// run q -$ q -p 5001 -q) -``` - -q process is started. - -```bash -// run the subscriber which will automatically connect -$ python subscriber.py -===== Initital Table ===== -a b ---- -4 8 -9 1 -2 9 -7 5 -0 4 -1 6 -9 6 -2 1 -1 8 -8 5 -===== Initital Table ===== - -``` - -Python process is started with a table, and it connects to the q server and sets the `py_server` variable. - -```q -q)py_server[1 2] - -``` - -Send a new table row (1, 2) to the python process from q. - -```python -Recieved new table row from q: 1 2 -a b ---- -4 8 -9 1 -2 9 -7 5 -0 4 -1 6 -9 6 -2 1 -1 8 -8 5 -1 2 -``` - -The new row has been appended to the table. diff --git a/docs/examples/subsciber/subscriber.py b/docs/examples/subsciber/subscriber.py deleted file mode 100644 index 5ab019a..0000000 --- a/docs/examples/subsciber/subscriber.py +++ /dev/null @@ -1,44 +0,0 @@ -import pykx as kx - -import asyncio - - -table = kx.q('([] a: 10?10; b: 10?10)') - - -def assert_result(res): - # assert message from q process has the correct schema to be appended to the table - return type(res) is kx.LongVector and len(res) == 2 - - -async def main_loop(q): - global table - iters = 200 # only run a limited number of iterations for the example - while True: - await asyncio.sleep(0.5) # allows other async tasks to run along side - result = q.poll_recv() # this will return None if no message is available to be read - if assert_result(result): - print(f'Recieved new table row from q: {result}') - table = kx.q.upsert(table, result) - print(table) - result = None - iters -= 1 - if iters < 0: - break - - -async def main(): - global table - async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') - print(table) - print('===== Initital Table =====') - # Set the variable py_server on the q process pointing towards this processes IPC connection - # We use neg to ensure the messages are sent async so no reply is expected from this process - await q('py_server: neg .z.w') - - await main_loop(q) - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/docs/examples/subscriber/archive.zip b/docs/examples/subscriber/archive.zip index 0e8323e..655bb7d 100644 Binary files a/docs/examples/subscriber/archive.zip and b/docs/examples/subscriber/archive.zip differ diff --git a/docs/examples/subscriber/readme.md b/docs/examples/subscriber/readme.md index f6cc736..b7f31dd 100644 --- a/docs/examples/subscriber/readme.md +++ b/docs/examples/subscriber/readme.md @@ -32,7 +32,7 @@ $ python subscriber_async.py ### Outcome -What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are recieved. +What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are received. ```q // run q @@ -45,7 +45,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initital Table ===== +===== Initial Table ===== a b --- 4 8 @@ -58,7 +58,7 @@ a b 2 1 1 8 8 5 -===== Initital Table ===== +===== Initial Table ===== ``` diff --git a/docs/examples/subscriber/subscriber.py b/docs/examples/subscriber/subscriber.py index 5ab019a..e187d34 100644 --- a/docs/examples/subscriber/subscriber.py +++ b/docs/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/subscriber/subscriber_async.py b/docs/examples/subscriber/subscriber_async.py index 4db8388..30b628d 100644 --- a/docs/examples/subscriber/subscriber_async.py +++ b/docs/examples/subscriber/subscriber_async.py @@ -25,9 +25,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/threaded_execution/README.md b/docs/examples/threaded_execution/README.md deleted file mode 100644 index 871c0e5..0000000 --- a/docs/examples/threaded_execution/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# PyKX Calling into q from multiple threads - -The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` -to call into EmbeddedQ from multiple threads. - -To follow along with this example please feel free to download this -zip archive that contains a copy of the python script and this -writeup. - -## Quickstart - -This example creates a python process that calls into `q` from multiple threads. When using the -included `pykxthreading` library these threads will be able to modify state when calling into `kx.q`. -The base `EmbeddedQ` object within PyKX normally only allows the main thread to make these state -changes. - - -### Start the PyKX threaded execution example - -```bash -$ python threaded_execution.py -``` - -### Outcome - -In this simple example the output of `kx.q.til(...)` will be output to the console, where each thread -is given a different number of elements to print. - -``` -$ python threaded_execution.py -0 1 -0 1 2 -0 1 2 3 4 -0 1 2 3 4 5 6 -0 1 2 3 -0 1 2 3 4 5 6 7 -0 1 2 3 4 5 -0 1 2 3 4 5 6 7 8 -0 1 2 3 4 5 6 7 8 9 -0 1 2 3 4 5 6 7 8 9 10 11 -0 1 2 3 4 5 6 7 8 9 10 -``` - -### Important Note on usage - -Since the `pykxthreading` library creates a background thread to run the calls into `EmbeddedQ`, the -background thread must be shutdown when finished. The easiest way to ensure this is done is by using -a `try` - `finally` block around the entrypoint to your script. This will ensure that even in the -event of an error the background thread will still be shutdown correctly so python can exit. - -``` -def main(): - ... - - -if __name__ == '__main__': - try: - main() - finally: - # Must shutdown the background thread to properly exit - shutdown_q() -``` diff --git a/docs/examples/threaded_execution/archive.zip b/docs/examples/threaded_execution/archive.zip index 07efa2b..6e58739 100644 Binary files a/docs/examples/threaded_execution/archive.zip and b/docs/examples/threaded_execution/archive.zip differ diff --git a/docs/examples/threaded_execution/asyncio_threading.py b/docs/examples/threaded_execution/asyncio_threading.py index b1931ba..9d96f06 100644 --- a/docs/examples/threaded_execution/asyncio_threading.py +++ b/docs/examples/threaded_execution/asyncio_threading.py @@ -31,9 +31,9 @@ async def main(): calls = 1000 conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa main_q_con = kx.SyncQConnection(port=5001) - print('===== Initital Table =====') + print('===== Initial Table =====') print(kx.q('table')) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] diff --git a/docs/examples/threaded_execution/pykxthreading/__init__.py b/docs/examples/threaded_execution/pykxthreading/__init__.py deleted file mode 100644 index 9d2bc3f..0000000 --- a/docs/examples/threaded_execution/pykxthreading/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from .pykxthreading import close as shutdown_q, q - -import pykx - - -pykx.q = q - -__all__ = sorted([ - 'shutdown_q', - 'pykx', - 'q' -]) - - -def __dir__(): - return __all__ diff --git a/docs/examples/threaded_execution/pykxthreading/pykxthreading.py b/docs/examples/threaded_execution/pykxthreading/pykxthreading.py deleted file mode 100644 index 3a20460..0000000 --- a/docs/examples/threaded_execution/pykxthreading/pykxthreading.py +++ /dev/null @@ -1,50 +0,0 @@ -import threading -from queue import Queue - - -def q_thread(q_queue): - import os - os.environ['PYKX_NO_SIGINT'] = '1' - import pykx as kx - e_q = kx.EmbeddedQ() - while True: - call = q_queue.get() - if callable(call): - break - fut = call[0] - try: - res = e_q(call[1], *call[2], **call[3]) - fut.set_result(res) - except BaseException as e: - fut.set_exception(e) - - -q_queue = Queue() -th = threading.Thread(target=q_thread, args=(q_queue,)) -th.start() - - -import pykx as kx - - -class ThreadedQ(kx.Q): - def __init__(self, q_queue): - object.__setattr__(self, 'q_queue', q_queue) - super().__init__() - - def __call__(self, query, *args, **kwargs): - fut = kx.EmbeddedQFuture() - self.q_queue.put((fut, query, args, kwargs)) - return fut._await() - - _call = __call__ - - -q = ThreadedQ(q_queue) - - -def close(): - global th - global q_queue - q_queue.put(lambda x: 1) - th.join() diff --git a/docs/examples/threaded_execution/threaded_execution.md b/docs/examples/threaded_execution/threaded_execution.md deleted file mode 100644 index 871c0e5..0000000 --- a/docs/examples/threaded_execution/threaded_execution.md +++ /dev/null @@ -1,62 +0,0 @@ -# PyKX Calling into q from multiple threads - -The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` -to call into EmbeddedQ from multiple threads. - -To follow along with this example please feel free to download this -zip archive that contains a copy of the python script and this -writeup. - -## Quickstart - -This example creates a python process that calls into `q` from multiple threads. When using the -included `pykxthreading` library these threads will be able to modify state when calling into `kx.q`. -The base `EmbeddedQ` object within PyKX normally only allows the main thread to make these state -changes. - - -### Start the PyKX threaded execution example - -```bash -$ python threaded_execution.py -``` - -### Outcome - -In this simple example the output of `kx.q.til(...)` will be output to the console, where each thread -is given a different number of elements to print. - -``` -$ python threaded_execution.py -0 1 -0 1 2 -0 1 2 3 4 -0 1 2 3 4 5 6 -0 1 2 3 -0 1 2 3 4 5 6 7 -0 1 2 3 4 5 -0 1 2 3 4 5 6 7 8 -0 1 2 3 4 5 6 7 8 9 -0 1 2 3 4 5 6 7 8 9 10 11 -0 1 2 3 4 5 6 7 8 9 10 -``` - -### Important Note on usage - -Since the `pykxthreading` library creates a background thread to run the calls into `EmbeddedQ`, the -background thread must be shutdown when finished. The easiest way to ensure this is done is by using -a `try` - `finally` block around the entrypoint to your script. This will ensure that even in the -event of an error the background thread will still be shutdown correctly so python can exit. - -``` -def main(): - ... - - -if __name__ == '__main__': - try: - main() - finally: - # Must shutdown the background thread to properly exit - shutdown_q() -``` diff --git a/docs/examples/threaded_execution/threaded_execution.py b/docs/examples/threaded_execution/threaded_execution.py deleted file mode 100644 index a6e419c..0000000 --- a/docs/examples/threaded_execution/threaded_execution.py +++ /dev/null @@ -1,17 +0,0 @@ -import threading - -from pykxthreading import pykx as kx, shutdown_q - - -def main(): - a = [threading.Thread(target=lambda x: print(kx.q.til(x)), args=(x,)) for x in range(2, 13)] - [x.start() for x in a] - [x.join() for x in a] - - -if __name__ == '__main__': - try: - main() - finally: - # Must shutdown the background thread to properly exit - shutdown_q() diff --git a/docs/examples/threaded_execution/threading.md b/docs/examples/threaded_execution/threading.md index 031706c..14f051c 100644 --- a/docs/examples/threaded_execution/threading.md +++ b/docs/examples/threaded_execution/threading.md @@ -10,7 +10,7 @@ writeup. ## Quickstart This example creates a python process that creates multiple tasks/threads that subscribe to a `q` -process over IPC and upon recieving a new row upsert it to a local table. There are 2 scripts +process over IPC and upon receiving a new row upsert it to a local table. There are 2 scripts included: `asyncio_threading.py` and `threads.py`, the first uses asyncio tasks running on seperate threads and the second example uses the python `threading` library directly to spawn threads. @@ -27,11 +27,11 @@ $ python threads.py ### Outcome The inital table will be printed upon starting the program, once all the threads/tasks have -upserted all of the rows they have recieved to the table the final table will be printed. +upserted all of the rows they have received to the table the final table will be printed. ``` $ python asyncio_threading.py -===== Initital Table ===== +===== Initial Table ===== a b --- 4 8 @@ -44,7 +44,7 @@ a b 2 1 1 8 8 5 -===== Initital Table ===== +===== Initial Table ===== a b ----- 4 8 diff --git a/docs/examples/threaded_execution/threads.py b/docs/examples/threaded_execution/threads.py index 374382e..f93689e 100644 --- a/docs/examples/threaded_execution/threads.py +++ b/docs/examples/threaded_execution/threads.py @@ -33,9 +33,9 @@ async def main(): calls = 1000 conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa main_q_con = kx.SyncQConnection(port=5001) - print('===== Initital Table =====') + print('===== Initial Table =====') print(kx.q('table')) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] diff --git a/docs/extras/faq.md b/docs/extras/faq.md deleted file mode 100644 index 57e4ddd..0000000 --- a/docs/extras/faq.md +++ /dev/null @@ -1,32 +0,0 @@ -# FAQ - -## How to work around the `'cores` licensing error? - -``` ->>> import pykx as kx -:228: PyKXWarning: Failed to initialize embedded q; falling back to unlicensed mode, which has limited functionality. Refer to https://code.kx.com/pykx/user-guide/advanced/modes.html for more information. Captured output from initialization attempt: - '2022.09.15T10:32:13.419 licence error: cores -``` - -This error indicates your license is limited to a given number of cores but PyKX tried to use more cores than the license allows. - -- On Linux you can use `taskset` to limit the number of cores used by the python process and likewise PyKX and EmbeddedQ: -``` -# Example to limit python to the 4 first cores on a 8 cores CPU -$ taskset -c 0-3 python -``` - -- You can also do this in python before importing PyKX (Linux only): -``` ->>> import os ->>> os.sched_setaffinity(0, [0, 1, 2, 3]) ->>> import pykx as kx ->>> kx.q('til 10') -pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) -``` - -- On Windows you can use the `start` command with its `/affinity` argument (see: `> help start`): -``` -> start /affinity f python -``` -(above, 0xf = 00001111b, so the python process will only use the four cores for which the mask bits are equal to 1) diff --git a/docs/getting-started/PyKX Introduction Notebook.ipynb b/docs/getting-started/PyKX Introduction Notebook.ipynb deleted file mode 100644 index 1623b7e..0000000 --- a/docs/getting-started/PyKX Introduction Notebook.ipynb +++ /dev/null @@ -1,1096 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PyKX Introduction Notebook\n", - "\n", - "The purpose of this notebook is to provide an introduction to the capabilities and functionality made available to you with PyKX.\n", - "\n", - "To follow along please download this notebook using the following 'link.'\n", - "\n", - "This Notebook is broken into the following sections\n", - "\n", - "1. [How to import PyKX](#How-to-import-Pykx)\n", - "1. [The basic data structures of PyKX](#The-basic-data-structures-of-PyKX)\n", - "1. [Accessing and creating PyKX objects](#Accessing-and-creating-PyKX-objects)\n", - "1. [Running analytics on objects in PyKX](#Running-analytics-on-objects-in-PyKX)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Welcome to PyKX!\n", - "\n", - "PyKX is a Python library built and maintained for interfacing seamlessly with the worlds fastest time-series database technology kdb+ and it's underlying vector programming language q.\n", - "\n", - "It's aim is to provide you and all Python data-engineers and data-scientist with an interface to efficiently apply analytics on large volumes of on-disk and in-memory data, in a fraction of the time of competitor libraries.\n", - "\n", - "## How to import PyKX\n", - "\n", - "To access PyKX and it's functions import it in your Python code as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": ["hide_code"] - }, - "outputs": [], - "source": [ - "import os\n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - }, - "outputs": [], - "source": [ - "import pykx as kx\n", - "kx.q.system.console_size = [10, 80]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The shortening of the import name to `kx` is done for readability of code that uses PyKX and is the intended standard for the library. As such we recommend that you always use `import pykx as kx` when using the library.\n", - "\n", - "Below we load additional libraries used through this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The basic data structures of PyKX\n", - "\n", - "Central to your interaction with PyKX are the various data types that are supported by the library, fundamentally PyKX is built atop a fully featured functional programming language `q` which provides small footprint data structures that can be used in analytic calculations and the creation of highly performant databases. The types we show below are generated from Python equivalent types but as you will see through this notebook \n", - "\n", - "In this section we will describe the basic elements which you will come in contact with as you traverse the library and explain why/how they are different.\n", - "\n", - "### PyKX Atomic Types\n", - "\n", - "In PyKX an atom denotes a single irreducible value of a specific type, for example you may come across `pykx.FloatAtom` or `pykx.DateAtom` objects generated as follows which may have been generated as follows from an equivalent Pythonic representation. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.FloatAtom(1.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import date\n", - "kx.DateAtom(date(2020, 1, 1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PyKX Vector Types\n", - "\n", - "Similar to atoms, vectors are a data structure composed of a collection of multiple elements of a single specified type. These objects in PyKX along with lists described below form the basis for the majority of the other important data structures that you will encounter including dictionaries and tables.\n", - "\n", - "Typed vector objects provide significant benefits when it comes to the applications of analytics over Python lists for example. Similar to Numpy, PyKX gains from the underlying speed of it's analytic engine when operating on these strictly typed objects.\n", - "\n", - "Vector type objects are always 1-D and as such are/can be indexed along a single axis.\n", - "\n", - "In the following example we are creating PyKX vectors from common Python equivalent `numpy` and `pandas` objects." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.IntVector(np.array([1, 2, 3, 4], dtype=np.int32))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.toq(pd.Series([1, 2, 3, 4]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PyKX Lists\n", - "\n", - "A `List` in PyKX can loosely be described as an untyped vector object. Unlike vectors which are optimised for the performance of analytics, lists are more commonly used for storing reference information or matrix data.\n", - "\n", - "Unlike vector objects which are by definition 1-D in shape, lists can be ragged N-Dimensional objects. This makes them useful for the storage of some complex data structures but limits their performance when dealing with data-access/data modification tasks." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.List([[1, 2, 3], [1.0, 1.1, 1.2], ['a', 'b', 'c']])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PyKX Dictionaries\n", - "\n", - "A dictionary in PyKX is defined as a mapping between a direct key-value mapping, the list of keys and values to which they are associated must have the same count. While it can be considered as a key-value pair, it is physically stored as a pair of lists." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.Dictionary({'x': [1, 2, 3], 'x1': np.array([1, 2, 3])}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PyKX Tables\n", - "\n", - "Tables in PyKX are a first-class typed entity which live in memory. They can be fundamentally described as a collection of named columns implemented as a dictionary. This mapping construct means that tables in PyKX are column-oriented which makes analytic operations on specified columns much faster than would be the case for a relational database equivalent.\n", - "\n", - "Tables in PyKX come in many forms but the key table types are as follows\n", - "\n", - "- `pykx.Table` \n", - "- `pykx.KeyedTable`\n", - "- `pykx.SplayedTable`\n", - "- `pykx.PartitionedTable`\n", - "\n", - "In this section we will deal only with the first two of these which constitute specifically the in-memory data table types. As will be discussed in later sections `Splayed` and `Partitioned` tables are memory-mapped on-disk data structures, these are derivations of the `pykx.Table` and `pykx.KeyedTable` type objects.\n", - "\n", - "#### `pykx.Table`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.Table(data = {'col1': [1, 2, 3], 'col2': [2 , 3, 4], 'col3': ['a', 'b', 'c']}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `pykx.KeyedTable`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.Table(data = {'x': [1, 2, 3], 'x1': [2, 3, 4], 'x2': ['a', 'b', 'c']}).set_index(['x'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Other Data Types\n", - "\n", - "The above types outline the majority of the important type structures in PyKX but there are many others which you will encounter as you use the library, below we have outlined some of the important ones that you will run into through the rest of this notebook.\n", - "\n", - "#### `pykx.Lambda`\n", - "\n", - "A `pykx.Lambda` is the most basic kind of function within PyKX. They take between 0 and 8 parameters and are the building blocks for most analytics written by users when interacting with data from PyKX." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pykx_lambda = kx.q('{x+y}')\n", - "type(pykx_lambda)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pykx_lambda(1, 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `pykx.Projection`\n", - "\n", - "Similar to [functools.partial](https://docs.python.org/3/library/functools.html#functools.partial), functions in PyKX can have some of their parameters fixed in advance, resulting in a new function, which is called a projection. When this projection is called, the fixed parameters are no longer required, and cannot be provided.\n", - "\n", - "If the original function had `n` total parameters, and it had `m` provided, the result would be a function (projection) that requires a user to input `n-m` parameters." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "projection = kx.q('{x+y}')(1)\n", - "projection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "projection(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Accessing and creating PyKX objects\n", - "\n", - "Now that we have seen some of the PyKX object types that you will encounter, practically speaking how will they be created in real-world scenarios?\n", - "\n", - "### Creating PyKX objects from Pythonic data types\n", - "\n", - "One of the most common ways that PyKX data is generated is through conversions from equivalent Pythonic data types. PyKX natively supports conversions to and from the following common Python data formats.\n", - "\n", - "- Python\n", - "- Numpy\n", - "- Pandas\n", - "- PyArrow\n", - "\n", - "In each of the above cases generation of PyKX objects is facilitated through the use of the `kx.toq` PyKX function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pydict = {'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}\n", - "kx.toq(pydict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nparray = np.array([1, 2, 3, 4], dtype = np.int32)\n", - "kx.toq(nparray)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pdframe = pd.DataFrame(data = {'a':[1, 2, 3], 'b': ['a', 'b', 'c']})\n", - "kx.toq(pdframe)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Random data generation\n", - "\n", - "PyKX provides users with a module for the creation of random data of user specified PyKX types or their equivalent Python types. The creation of random data is useful in prototyping analytics and is used extensively within our documentation when creating test examples.\n", - "\n", - "As a first example you can generate a list of 1,000,000 random floating point values between 0 and 1 as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.random(1000000, 1.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If instead you wish to choose values randomly from a list, this can be facilitated by using the list as the second argument to your function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.random(5, [kx.LongAtom(1), ['a', 'b', 'c'], np.array([1.1, 1.2, 1.3])])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Random data does not only come in 1-Dimensional forms however and modifications to the first argument to be a list allow you to create multi-Dimensional PyKX Lists. The below examples are additionally using a PyKX trick where nulls/infinities can be used to generate random data across the full allowable range" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.random([2, 5], kx.GUIDAtom.null)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.random([2, 3, 4], kx.IntAtom.inf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, users can set the seed for the random data generation explicitly allowing users to have consistency over the generated objects. This can be completed globally or for individual function calls" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.seed(10)\n", - "kx.random.random(10, 2.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.random.random(10, 2.0, seed = 10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running q code to generate data\n", - "\n", - "As mentioned in the introduction PyKX provides an entrypoint to the vector programming language q, as such users of PyKX can execute q code directly via PyKX within a Python session. This is facilitated through use of calls to `kx.q`.\n", - "\n", - "Create some q data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('0 1 2 3 4')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('([idx:desc til 5]col1:til 5;col2:5?1f;col3:5?`2)')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Apply arguments to a user specified function `x+y`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('{x+y}', kx.LongAtom(1), kx.LongAtom(2))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read data from a CSV file\n", - "\n", - "A lot of data that you run into for data analysis tasks comes in the form of CSV files, PyKX similar to Pandas provides a CSV reader called via `kx.q.read.csv`, in the following cell we will create a CSV to be read in using PyKX" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import csv\n", - "\n", - "with open('pykx.csv', 'w', newline='') as file:\n", - " writer = csv.writer(file)\n", - " field = [\"name\", \"age\", \"height\", \"country\"]\n", - " \n", - " writer.writerow(field)\n", - " writer.writerow([\"Oladele Damilola\", \"40\", \"180.0\", \"Nigeria\"])\n", - " writer.writerow([\"Alina Hricko\", \"23\", \"179.2\", \"Ukraine\"])\n", - " writer.writerow([\"Isabel Walter\", \"50\", \"179.5\", \"United Kingdom\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.read.csv('pykx.csv', types = {'age': kx.LongAtom, 'country': kx.SymbolAtom})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.remove('pykx.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Querying external Processes via IPC\n", - "\n", - "One of the most common usage patterns in organisations with access to data in kdb+/q you will encounter is to query this data from an external server process infrastructure. In the example below we assume that you have q installed in addition to PyKX, see [here](https://kx.com/kdb-insights-personal-edition-license-download/) to install q alongside the license access for PyKX.\n", - "\n", - "First we set up a q/kdb+ server setting it on port 5050 and populating it with some data in the form of a table `tab`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import subprocess\n", - "import time\n", - "\n", - "try:\n", - " proc = subprocess.Popen(\n", - " ('q', '-p', '5000'),\n", - " stdin=subprocess.PIPE,\n", - " stdout=subprocess.DEVNULL,\n", - " stderr=subprocess.DEVNULL,\n", - " )\n", - " time.sleep(2)\n", - "except:\n", - " raise kx.QError('Unable to create q process on port 5000')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once a q process is available you can establish a connection to it for synchronous query execution as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn = kx.SyncQConnection(port = 5000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can now run q commands against the q server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('tab:([]col1:100?`a`b`c;col2:100?1f;col3:100?0Ng)')\n", - "conn('select from tab where col1=`a')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or use the PyKX query API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn.qsql.select('tab', where=['col1=`a', 'col2<0.3'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or use PyKX's context interface to run SQL server side if it's available to you" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('\\l s.k_')\n", - "conn.sql('SELECT * FROM tab where col2>=0.5')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally the q server used for this demonstration can be shut down" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "proc.stdin.close()\n", - "proc.kill()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running analytics on objects in PyKX\n", - "\n", - "Like many Python libraries including Numpy and Pandas PyKX provides a number of ways that it's data can be used with analytics defined internal to the library and which you have self generated.\n", - "\n", - "### Using in-built methods on PyKX Vectors\n", - "\n", - "When you are interacting with PyKX Vectors you may wish to gain insights into these objects through the application of basic analytics such as calculation of the `mean`/`median`/`mode` of the vector" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "q_vector = kx.random.random(1000, 10.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "q_vector.mean()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "q_vector.max()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The above is useful for basic analysis but will not be sufficient for more bespoke analytics on these vectors, to allow you more control over the analytics run you can also use the `apply` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def bespoke_function(x, y):\n", - " return x*y\n", - "\n", - "q_vector.apply(bespoke_function, 5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using in-built methods on PyKX Tables\n", - "\n", - "In addition to the vector processing capabilities of PyKX your ability to operate on Tabular structures is also important. Highlighted in greater depth within the Pandas-Like API documentation [here](../user-guide/advanced/Pandas_API.ipynb) these methods allow you to apply functions and gain insights into your data in a way that is familiar.\n", - "\n", - "In the below example you will use combinations of the most commonly used elements of this Table API operating on the following table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "N = 1000000\n", - "example_table = kx.Table(data = {\n", - " 'sym' : kx.random.random(N, ['a', 'b', 'c']),\n", - " 'col1' : kx.random.random(N, 10.0),\n", - " 'col2' : kx.random.random(N, 20)\n", - " }\n", - ")\n", - "example_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can search for and filter data within your tables using `loc` similarly to how this is achieved by Pandas as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "example_table.loc[example_table['sym'] == 'a']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This behavior also is incorporated when retrieving data from a table through the `__get__` method as you can see here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "example_table[example_table['sym'] == 'b']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can additionally set the index columns of the table, when dealing with PyKX tables this converts the table from a `pykx.Table` object to a `pykx.KeyedTable` object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "example_table.set_index('sym')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additional to basic data manipulation such as index setting you also get access to analytic capabilities such as the application of basic data manipulation operations such as `mean` and `median` as demonstrated here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('mean:')\n", - "print(example_table.mean(numeric_only = True))\n", - "\n", - "print('median:')\n", - "print(example_table.median(numeric_only = True))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can make use of the `groupby` method which groups the PyKX tabular data which can then be used for analytic application.\n", - "\n", - "In your first example let's start by grouping the dataset based on the `sym` column and then calculating the `mean` for each column based on their `sym`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "example_table.groupby('sym').mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an extension to the above groupby you can now consider a more complex example which is making use of `numpy` to run some calculations on the PyKX data, you will see later that this can be simplified further in this specific use-case" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def apply_func(x):\n", - " nparray = x.np()\n", - " return np.sqrt(nparray).mean()\n", - "\n", - "example_table.groupby('sym').apply(apply_func)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Time-series specific joining of data can be completed using `merge_asof` joins. In this example a number of tables with temporal information namely a `trades` and `quotes` table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trades = kx.Table(data={\n", - " \"time\": [\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n", - " ],\n", - " \"ticker\": [\n", - " \"GOOG\",\n", - " \"MSFT\",\n", - " \"MSFT\",\n", - " \"MSFT\",\n", - " \"GOOG\",\n", - " \"AAPL\",\n", - " \"GOOG\",\n", - " \"MSFT\"\n", - " ],\n", - " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n", - " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n", - "})\n", - "quotes = kx.Table(data={\n", - " \"time\": [\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n", - " ],\n", - " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n", - " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n", - " \"quantity\": [75, 155, 100, 100, 100]\n", - "})\n", - "\n", - "print('trades:')\n", - "display(trades)\n", - "print('quotes:')\n", - "display(quotes)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When applying the asof join you can additionally used named arguments to ensure that it is possible to make a distinction between the tables that the columns originate. In this case suffixing with `_trades` and `_quotes`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trades.merge_asof(quotes, on='time', suffixes=('_trades', '_quotes'))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using PyKX/q native functions\n", - "\n", - "While use of the Pandas-Like API and methods provided off PyKX Vectors provides an effective method of applying analytics on PyKX data the most efficient and performant way you can run analytics on your data is through the use of the PyKX/q primitives which are available through the `kx.q` module.\n", - "\n", - "These include functionality for the calculation of moving averages, application of asof/window joins, column reversal etc. A full list of the available functions and some examples of their usage can be found [here](../api/pykx-execution/q.md).\n", - "\n", - "Here are a few examples of usage of how you can use these functions, broken into sections for convenience\n", - "\n", - "#### Mathematical functions\n", - "\n", - "##### mavg\n", - "\n", - "Calculate a series of average values across a list using a rolling window" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.mavg(10, kx.random.random(10000, 2.0))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### cor\n", - "\n", - "Calculate the correlation between two lists" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.cor([1, 2, 3], [2, 3, 4])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.cor(kx.random.random(100, 1.0), kx.random.random(100, 1.0))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### prds\n", - "\n", - "Calculate the cumulative product across a supplied list" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.prds([1, 2, 3, 4, 5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Iteration functions\n", - "\n", - "##### each\n", - "\n", - "Supplied both as a standalone primitive and as a method for PyKX Lambdas `each` allows you to pass individual elements of a PyKX object to a function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.each(kx.q('{prd x}'), kx.random.random([5, 5], 10.0, seed=10))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('{prd x}').each(kx.random.random([5, 5], 10.0, seed=10))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Table functions\n", - "\n", - "##### meta\n", - "\n", - "Retrieval of metadata information about a table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qtab = kx.Table(data = {\n", - " 'x' : kx.random.random(1000, ['a', 'b', 'c']).grouped(),\n", - " 'y' : kx.random.random(1000, 1.0),\n", - " 'z' : kx.random.random(1000, kx.TimestampAtom.inf)\n", - "})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.meta(qtab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### xasc\n", - "\n", - "Sort the contents of a specified column in ascending order" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.xasc('z', qtab)" - ] - } - ], - "metadata": { - "file_extension": ".py()", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" - }, - "mimetype": "text/x-python", - "name": "python", - "npconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3 - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/getting-started/PyKX Introduction Notebook.ipynb b/docs/getting-started/PyKX Introduction Notebook.ipynb new file mode 120000 index 0000000..c4d7f9f --- /dev/null +++ b/docs/getting-started/PyKX Introduction Notebook.ipynb @@ -0,0 +1 @@ +../../examples/notebooks/interface_overview.ipynb \ No newline at end of file diff --git a/docs/getting-started/installing.md b/docs/getting-started/installing.md index effd713..ce7e0db 100644 --- a/docs/getting-started/installing.md +++ b/docs/getting-started/installing.md @@ -12,7 +12,7 @@ Installation of PyKX is available in using three methods !!! Note Python Support - PyKX is only officially supported on Python versions 3.8-3.11, Python 3.7 has reached end of life and is no longer actively supported, please consider upgrading + PyKX is only officially supported on Python versions 3.8-3.12, Python 3.7 has reached end of life and is no longer actively supported, please consider upgrading === "Installing PyKX from PyPI" Ensure you have a recent version of `pip`: @@ -135,7 +135,7 @@ To provide environment specific flexibility there are two methods by which users #### Using a supplied license file directly -1. Visit https://kx.com/kdb-insights-personal-edition-license-download/ or https://kx.com/kdb-insights-commercial-evaluation-license-download/ and fill in the attached form following the instructions provided. +1. Visit [here](https://kx.com/kdb-insights-personal-edition-license-download/) for a personal edition or [here](https://kx.com/kdb-insights-commercial-evaluation-license-download/) for a commercial evaluation license and fill in the attached form following the instructions provided. 2. On receipt of an email from KX providing access to your license download the license file and save to a secure location on your computer. 3. Set an environment variable on your computer pointing to the folder containing the license file (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). * Variable Name: `QLIC` @@ -143,7 +143,7 @@ To provide environment specific flexibility there are two methods by which users #### Using the base64 encoded license content -1. Visit https://kx.com/kdb-insights-personal-edition-license-download/ or https://kx.com/kdb-insights-commercial-evaluation-license-download/ and fill in the attached form following the instructions provided. +1. Visit [here](https://kx.com/kdb-insights-personal-edition-license-download/) for a personal edition or [here](https://kx.com/kdb-insights-commercial-evaluation-license-download/) for a commercial evaluation license and fill in the attached form following the instructions provided. 2. On receipt of an email from KX providing access to your license copy the base64 encoded contents of your license provided in plain-text within the email 3. Set an environment variable `KDB_LICENSE_B64` on your computer pointing with the value copied in step 2 (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). * Variable Name: `KDB_LICENSE_B64` @@ -155,9 +155,9 @@ If looking to make use of a `k4.lic` you can do so by setting the base64 encoded KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX installed from wheel files. Support for user-built installations of PyKX (e.g. built from the source distribution) is only provided on a best-effort basis. Currently, PyKX provides wheels for the following environments: -- Linux (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.11 -- macOS (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.11 -- Windows (`win_amd64`) with CPython 3.8-3.11 +- Linux (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.12 +- macOS (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.12 +- Windows (`win_amd64`) with CPython 3.8-3.12 ## Dependencies @@ -167,9 +167,10 @@ KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX PyKX depends on the following third-party Python packages: -- `numpy~=1.20; python_version=='3.7'` -- `numpy~=1.22; python_version<'3.11', python_version>'3.7'` -- `numpy~=1.23.2; python_version>='3.11'` +- `numpy~=1.20, <2.0; python_version=='3.7'` +- `numpy~=1.22, <2.0; python_version<'3.11', python_version>'3.7'` +- `numpy~=1.23, <2.0; python_version=='3.11'` +- `numpy~=1.26, <2.0; python_version=='3.12'` - `pandas>=1.2, < 2.2.0` - `pytz>=2022.1` - `toml~=0.10.2` @@ -193,7 +194,7 @@ The following provides a breakdown of how these libraries are used within PyKX !!! Warning - Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11. + Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11-3.12. The following provides a breakdown of how these libraries are used within PyKX @@ -209,6 +210,14 @@ The following provides a breakdown of how these libraries are used within PyKX To run q or PyKX on Windows, `msvcr100.dll` must be installed. It is included in the [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999). +Alternatively installation of all required Windows dependencies can be completed through execution of the `w64_install.ps1` supplied at the root of the PyKX github [here](https://github.com/KxSystems/pykx) as follows using PowerShell: + +```PowerShell +git clone https://github.com/kxsystems/pykx +cd pykx +.\w64_install.ps1 +``` + ## Next steps - [Quickstart guide](quickstart.md) diff --git a/docs/getting-started/interface_overview.ipynb b/docs/getting-started/interface_overview.ipynb deleted file mode 100644 index 493e0ad..0000000 --- a/docs/getting-started/interface_overview.ipynb +++ /dev/null @@ -1,1757 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Interface Overview\n", - "The purpose of this notebook is to provide a demonstration of the capabilities of PyKX for users who are familiar with q.\n", - "\n", - "To follow along please download this notebook using the following 'link.'\n", - "\n", - "This demonstration will outline the following\n", - "\n", - "1. [Initializing the library](#initializing-the-library)\n", - "2. [Generating q objects](#creating-q-objects-from-python-objects)\n", - "3. [Converting q to Python](#converting-q-to-python)\n", - "4. [Interacting with q objects](#k-object-properties-and-methods)\n", - "5. [Context Interface](#context-interface)\n", - "6. [Querying Interface](#querying-interface)\n", - "7. [IPC communication](#ipc-communication)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initializing the library" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Non-PyKX Requirements\n", - "\n", - "For the purpose of this demonstration the following Python libraries/modules are required" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "import sys\n", - "from tempfile import mkdtemp\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import pyarrow as pa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initialization\n", - "\n", - "Once installed via pip, PyKX can be started by importing the module. This will initialize embedded q within the Python process if a valid q license is found (e.g. in `$QHOME` or `$QLIC`), or fall back to the unlicensed version if no such license is found. This notebook will use the licensed version of PyKX. To force the usage of the unlicensed version (and silence the warning that is raised when the fallback to the unlicensed version is employed) you can add `--unlicensed` to the environment variable `$QARGS`. `$QARGS` can be set to a string of arguments which will be used to initialize the embedded q instance, as if you had used those arguments to start q from the command line." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore') # Do not copy, as we are skipping symlinking pyKX to QHOME the core insights libraries will not be copied over and will raise warnings\n", - "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME \n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n", - "import pykx as kx\n", - "kx.q.system.console_size = [10, 80]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Evaluating q code using embedded q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('1+1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('1 2 3 4f')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('([]2?1f;2?0Ng;2?0b)')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('`a`b`c!(til 10;`a`b`c;5?\"abc\")')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating q objects from Python objects\n", - "\n", - "One of the strengths of the PyKX interface is the flexibility in the representations of objects that can be converted from a native Python representation to a q equivalent.\n", - "\n", - "By default data formatted in Python using the following libraries can be converted to a q equivalent representation.\n", - "\n", - "* python native types\n", - "* numpy\n", - "* pandas\n", - "* pyarrow\n", - "\n", - "These are all facilitated through use of the `K` method of the base `q` class shown before as follows\n", - "\n", - "#### Atomic Structures" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pyAtomic = 1.5\n", - "npAtomic = np.float64(1.5)\n", - "pdAtomic = pd.Series([1.5])\n", - "paAtomic = pa.array([1.5])\n", - "\n", - "print(kx.K(pyAtomic))\n", - "# print(kx.K(npAtomic))\n", - "# print(kx.K(pdAtomic))\n", - "# print(kx.K(paAtomic))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Array/Series Structures" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pyArray = [1, 2.5, \"abc\", b'defg']\n", - "npArray = np.array([1, 2.5, \"abc\", b'defg'], dtype = object)\n", - "pdSeries = pd.Series([pyArray])\n", - "paArray = pa.array([1, 2, 3])\n", - "\n", - "print(kx.K(pyArray))\n", - "# print(kx.K(npArray))\n", - "# print(kx.K(pdSeries))\n", - "# print(kx.K(paArray))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Tabular data\n", - "Round trip support for tabular data is presently supported for Pandas Dataframes and PyArrow tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pdtable = pd.DataFrame({'col1': [1, 2],\n", - " 'col2': [2., 3.],\n", - " 'col3': ['Hello', 'World']})\n", - "patable = pa.Table.from_pandas(pdtable)\n", - "\n", - "display(kx.K(pdtable))\n", - "# display(kx.K(patable))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Converting q to Python\n", - "All K objects support one or more of the following methods: `py()`, `np()`, `pd()` or `pa()`\n", - "\n", - "These methods provide an interface to the K object such that they can be converted to an analogous Python, Numpy, Pandas or PyArrow object respectively. \n", - "\n", - "Whether the view is a copy or not varies:\n", - "\n", - "1. The 'py' property always provides a copy.\n", - "2. The 'np' property does not copy unless the data cannot be interpreted by Numpy properly without changing it. For example, all temporal types in Numpy take 64 bits per item, so the 32 bit q temporal types must be copied to be represented as Numpy 'datetime64'/'timedelta64' elements. In cases where copying is unacceptable, the raw keyword argument can be set to true as demonstrated below.\n", - "3. The 'pd' property leverages the 'np' property to create Pandas objects, as such the same restrictions apply to it.\n", - "4. The 'pa' property leverages the 'pd' property to create PyArrow objects, as such the same restrictions apply to it.\n", - "\n", - "### Atomic Conversions\n", - "Define q items for conversion" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qbool = kx.q('0b')\n", - "qguid = kx.q('\"G\"$\"00000000-0000-0000-0000-000000000001\"')\n", - "qreal = kx.q('1.5e')\n", - "qlong = kx.q('1234')\n", - "qsymb = kx.q('`test')\n", - "qchar = kx.q('\"x\"')\n", - "qtime = kx.q('00:00:01')\n", - "qtstamp = kx.q('rand 0p')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert the above items to a variety of the Python types. Change the method used to experiment as necessary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(qbool.py())\n", - "print(qguid.pd())\n", - "print(qreal.np())\n", - "print(qlong.pa())\n", - "print(qsymb.py())\n", - "print(qchar.np())\n", - "print(qtime.pd())\n", - "print(qtstamp.np())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Vector Conversions\n", - "Define q items for conversion" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qbool = kx.q('2?0b')\n", - "qguid = kx.q('2?0Ng')\n", - "qreal = kx.q('2?5e')\n", - "qlong = kx.q('2?100')\n", - "qsymb = kx.q('2?`4')\n", - "qchar = kx.q('\"testing\"')\n", - "qtime = kx.q('2?0t')\n", - "qtstamp = kx.q('2?0p')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert the above items to a variety of the Python types. Change the method used to experiment as necessary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(qbool.py())\n", - "print(qguid.pd())\n", - "print(qreal.np())\n", - "print(qlong.pa())\n", - "print(qsymb.py())\n", - "print(qchar.np())\n", - "print(qtime.pd())\n", - "print(qtstamp.np())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dictionary conversions\n", - "Conversions between q dictionaries and Python are only supported for the `py()` method, numpy, pandas and pyarrow do not have appropriate equivalent representations and as such are not supported." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qdict=kx.q('`x`y`z!(10?10e;10?0Ng;4?`2)')\n", - "qdict.py()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Table conversions\n", - "Conversions between q keyed and unkeyed tables to an appropriate Python representation are supported for the `py()`, `np()`, `pd()` and `pa()` methods.\n", - "\n", - "Round trip conversions `q -> Python -> q` are however only supported for Pandas and PyArrow. Conversions from Numpy records are still to be completed and the most natural representation for a table in native python is a dictionary as such the conversion from python to q returns a q dictionary rather than a table\n", - "\n", - "Define a q table containing all q data types for conversion" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('N:5')\n", - "kx.q('gen_data:{@[;0;string]x#/:prd[x]?/:(`6;`6;0Ng;.Q.a),(\"xpdmnuvtbhijef\"$\\:0)}') # noqa\n", - "kx.q('dset_1D:gen_data[enlist N]')\n", - "kx.q('gen_names:{\"dset_\",/:x,/:string til count y}')\n", - "\n", - "qtab = kx.q('flip (`$gen_names[\"tab\";dset_1D])!N#\\'dset_1D') " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert the above table to a pandas dataframe and pyarrow table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(qtab.pd())\n", - "display(qtab.pa())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## K Object Properties and Methods\n", - "\n", - "### Miscellaneous Methods\n", - "\n", - "All K objects support the following methods/properties: \n", - "\n", - "| Method/Property | Description |\n", - "|:----------------|:------------|\n", - "| `t` | Return the q numeric datatype |\n", - "| `is_atom` | Is the item a q atomic type? |" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "str(kx.q('([] til 3; `a`b`c)'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "repr(kx.q('\"this is a char vector\"'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('`atom').is_atom" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('`not`atom').is_atom" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q('([]10?1f;10?1f)').t)\n", - "print(kx.q('`a`b`c!1 2 3').t)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# q list\n", - "qlist = kx.q('(1 2 3;1;\"abc\")')\n", - "list(qlist)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note the difference between this and the conversion of the same `qlist` to a true Python representation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qlist.py()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Numerical comparisons/functions\n", - "Various q datatypes vectors/atoms/tables can also interact with native Python mathematical comparisons and functions, the following provides an outline of a subset of the comparisons/functions that are supported:\n", - "\n", - "| Function | Description |\n", - "|:---------|:------------|\n", - "| `abs` | Absolute value of a number |\n", - "| `<` | Less than |\n", - "| `>=` | Greater than or equal to |\n", - "| `+` | Addition |\n", - "| `-` | Subtraction |\n", - "| `/` | Division |\n", - "| `*` | Multiplication |\n", - "| `**` | Power |\n", - "| `%` | Modulo | \n", - "\n", - "#### Define q/Python atoms and lists for comparisons" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qlong = kx.q('-5')\n", - "pylong = 5\n", - "qlist = kx.q('-3+til 5')\n", - "pylist = [1, 2, 3, 4, 5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Apply a number of the above comparisons/functions to python/q objects in combination" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(abs(qlong))\n", - "print(abs(qlist))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(qlong>pylong)\n", - "print(pylist>qlist)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(qlong*pylong)\n", - "print(pylist*qlist)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The `raw` q -> Python conversion keyword argument" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "All of the interfaces to the K objects support the `raw` keyword argument. When the `raw` keyword argument is set to `True` the interface forgoes some of the features when converting the object in exchange for greater efficiency." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab = kx.q('([]10?1f;10?1f;10?0p;10?0Ng)')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab.pd()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab.pd(raw=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qvec = kx.q('10?0t')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qvec.np()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qvec.np(raw=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Editing K objects\n", - "One of the expected aspects of interacting with Python objects natively is being able to index, slice, compare and modify the objects when it is reasonable to do so.\n", - "\n", - "The following sections show the interaction of a user with a q vector and table\n", - "\n", - "#### Vectors" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "v = kx.q('12?100')\n", - "print(v)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get the element at index 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "v[2]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve a slice containing elements 3-5" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "v[3:6]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Compare all vector elements to 50" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "v < 50" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Tables\n", - "\n", - "This only applies to in-memory tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab = kx.q('([]4?5;4?`2;4?0p;4?0Ng)')\n", - "tab.pd()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab['x1']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tab['x2'].py()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Splayed and Partitioned Tables\n", - "\n", - "Splayed and Partitioned tables are at present only partially supported. Users will be able to query the data and access information around the columns through the `keys` method but will not be able to retrieve the values contained within the data or convert to an analogous Python representation. These will raise a `NotImplementedError`.\n", - "\n", - "Research on this is still pending and any changes to support these conversions will be include an update here\n", - "\n", - "#### Splayed Tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tmp_dir = mkdtemp()\n", - "orig_dir = os.getcwd()\n", - "os.chdir(tmp_dir)\n", - "kx.q('`:db/t/ set ([] a:til 3; b:\"xyz\"; c:-3?0Ng)')\n", - "kx.q(r'\\l db')\n", - "t_splayed = kx.q('t')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "List the columns that are represented in the splayed table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list(t_splayed.keys())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Query the Splayed table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('?[`t;enlist(=;`a;1);0b;()]')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Attempt to evaluate the values method on the table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " t_splayed.values()\n", - "except NotImplementedError:\n", - " print('NotImplementedError was raised', file=sys.stderr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.chdir(orig_dir)\n", - "shutil.rmtree(tmp_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Partitioned Tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tmp_dir = mkdtemp()\n", - "orig_dir = os.getcwd()\n", - "os.chdir(tmp_dir)\n", - "kx.q('`:db/2020.01/t/ set ([] a:til 3; b:\"xyz\"; c:-3?0Ng)')\n", - "kx.q('`:db/2020.02/t/ set ([] a:1+til 3; b:\"cat\"; c:-3?0Ng)')\n", - "kx.q('`:db/2020.03/t/ set ([] a:2+til 3; b:\"bat\"; c:-3?0Ng)')\n", - "kx.q(r'\\l db')\n", - "t_partitioned = kx.q('t')\n", - "t_partitioned" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "List partitioned table columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list(t_partitioned.keys())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Query partitioned table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('?[`t;enlist(=;`a;1);0b;enlist[`c]!enlist`c]')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Attempt to convert partitioned table to a pandas dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " t_partitioned.pd()\n", - "except NotImplementedError:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.chdir(orig_dir)\n", - "shutil.rmtree(tmp_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### q Functions\n", - "\n", - "All functions defined in q can be called from PyKX via function objects. These function calls can take Python or q objects as input arguments. It is required that each argument being supplied to the function be convertible to a q representation using `kx.K(arg)`.\n", - "\n", - "Arguments can be provided either positionally, or as keyword arguments when the q function has named parameters." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f = kx.q('{x*y+z}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f(12, 2, 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f(12, 2, 1).py()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "g = kx.q('{[arg1;arg2] deltas sum each arg1 cross til arg2}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "g(arg2=7, arg1=kx.q('3?45')).np()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tok = kx.q(\"$'\")\n", - "print(repr(tok))\n", - "print(str(tok))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tok(kx.q('\"B\"'), kx.q('\" \",.Q.an')).np()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Context Interface\n", - "\n", - "The context interface provides a convenient way to interact with q contexts and namespaces using either the embedded q instance `pykx.q` or an IPC connection made with `pykx.QConnection`.\n", - "\n", - "Accessing an attribute which is not defined via the context interface, but which corresponds to a script (i.e. a `.q` or `.k` file), will cause it to be loaded automatically. Scripts are search for if they are:\n", - "1. In the same directory as the process running PyKX\n", - "2. In `QHOME`\n", - "\n", - "Other paths can be searched for by appending them to `kx.q.paths`. Alternatively, you can manually load a script with `kx.q.ctx._register`.\n", - "\n", - "Functions which are registered via the context interface are automatically added as callable members of their `QContext`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Builtin namespaces\n", - "\n", - "As a result of the infrastructure outlined above there are a number of namespaces which are automatically added as extensions to the q base class on loading. This includes the `.q`, `.z`, `.Q` and `.j` namespaces contained within `kx.q.k`, the following provides some example invocations of each.\n", - "\n", - "A number of the functions contained within the .z namespace are not callable, including but not limited to the following:\n", - "\n", - "- .z.ts\n", - "- .z.ex\n", - "- .z.ey\n", - "\n", - "Run `dir(kx.q.z)` to see what is available in the `.z` namespace.\n", - "\n", - "#### .q functionality\n", - "All the functions a user would expect to be exposed from q are callable as python methods off the q base class, the following provides a limited number of example invocations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q.til(10))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q.max([100, 2, 3, -4]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q.mavg(4, kx.q.til(10)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q.tables())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "s = kx.q('([]a:1 2;b:2 3;c:5 7)')\n", - "s" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t = kx.q('([]a:1 2 3;b:2 3 7;c:10 20 30;d:\"ABC\")').pd()\n", - "t" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.uj(s,t)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `.Q` namespace\n", - "The functions within the `.Q` namespace are also exposed as an extension.\n", - "\n", - "**Note**: While all functions within the `.Q` namespace are available, compared to the `.q`/`.z` namespaces these functions can be complicated to implement within the constraints of the PyKX interface for example `.Q.dpft` can be implemented but requires some thought" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.Q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.Q.an" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.Q.btoa(b'Hello World!')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t = kx.q('([]a:3 4 5;b:\"abc\";c:(2;3.4 3.2;\"ab\"))')\n", - "kx.q.each(kx.q.Q.ty, t['a','b','c'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `.j` namespace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "json = b'{\"x\":1, \"y\":\"test\"}'\n", - "qdict = kx.q.j.k(json)\n", - "print(qdict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.j.j(qdict).py()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### User defined extensions\n", - "As alluded to above users can add their own extension modules to PyKX by placing a relevant `.q`/`.k` to their `$QHOME`. The following shows the addition of an extension to complete a specific query and set some data which we would like to be available.\n", - "\n", - "#### Extension Example\n", - "The following example we will create (and later delete) the file '$QHOME/demo_extension.q'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "demo_extension_source = '''\n", - "\\d .demo_extension\n", - "N:100\n", - "test_data:([]N?`a`b`c;N?1f;N?10;N?0b)\n", - "test_function:{[data]\n", - " analytic_keys :`max_x1`avg_x2`med_x3;\n", - " analytic_calcs:(\n", - " (max;`x1);\n", - " (avg;`x2);\n", - " (med;`x3));\n", - " ?[data;\n", - " ();\n", - " k!k:enlist `x;\n", - " analytic_keys!analytic_calcs\n", - " ]\n", - " }\n", - "'''\n", - "demo_extension_filename = kx.qhome/'demo_extension.q'\n", - "with open(demo_extension_filename, 'w') as f:\n", - " f.write(demo_extension_source)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.demo_extension.test_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.demo_extension.test_function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.demo_extension.test_function(kx.q.demo_extension.test_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.remove(demo_extension_filename)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "--- " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Querying Interface\n", - "\n", - "One of the core purposes of this module is to provide users who are unfamiliar with q with a Pythonic approaches to interacting with q objects.\n", - "\n", - "One of the ways this is intended to be achieved is to provide Pythonic wrappers around common q tasks in a way that feels familiar to a Python developer but is still efficient/flexible.\n", - "\n", - "The querying interface is an example of this. It provides a wrapper around the q functional select syntax to facilitate the querying of persisted and local data while also allowing Python objects to be used as inputs where it is relevant.\n", - "\n", - "### help is provided\n", - "Users can use the Python `help` function to display the docstring associated with each of the functions within the `query` module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# help(kx.q.qsql)\n", - "# help(kx.q.qsql.select)\n", - "# help(kx.q.qsql.exec)\n", - "# help(kx.q.qsql.update)\n", - "# help(kx.q.qsql.delete)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Select functionality\n", - "The select functionality is provided both as an individually callable function or as a method off all tabular data.\n", - "\n", - "Generate a table and assign the Python object as a named entity within the q memory space." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qtab = kx.q('([]col1:100?`a`b`c;col2:100?1f;col3:100?5)')\n", - "kx.q['qtab'] = qtab" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entirety of the table using an empty select" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entire table using the module function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entire table based on a named reference\n", - "\n", - "This is important because it provides a method of querying partitioned/splayed tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The where keyword**\n", - "\n", - "Where clauses can be provided as a named keyword and are expected to be formatted as an individual string or a list of strings as in the following examples.\n", - "\n", - "By default no where conditions are applied to a select query" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.select(qtab, where='col1=`a')\n", - "kx.q.qsql.select(qtab, where=['col3<0.5', 'col2>0.7'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The columns keyword**\n", - "\n", - "The columns keyword is used to apply analytics to specific columns of the data or to select and rename columns within the dataset.\n", - "\n", - "By default if a user does not provide this information it is assumed that all columns are to be returned without modification.\n", - "\n", - "The columns keyword is expected to be a dictionary mapping the name that the new table will display for the column to the logic with which this data is modified." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab, columns={'col1': 'col1','newname': 'col2'})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab, columns={'max_col2': 'max col2'}, where='col1=`a')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The by keyword**\n", - "\n", - "The by keyword is used to apply analytics to group data based on common characteristics.\n", - "\n", - "By default if a user does not provide this information it is assumed that no grouping ins applied.\n", - "\n", - "The by keyword is expected to be a dictionary mapping the name to be applied to the by clause of the grouping to the column of the original table which is being used for the grouping." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(\n", - " qtab,\n", - " columns={'minCol2': 'min col2', 'medCol3': 'med col3'},\n", - " by={'groupCol1': 'col1'},\n", - " where=['col3<0.5', 'col2>0.7']\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete functionality\n", - "The delete functionality is provided both as an individually callable function or as a method off all tabular data. \n", - "\n", - "The following provides a outline of how this can be invoked in both cases.\n", - "\n", - "**Note**: By default the delete functionality **does not** modify the underlying representation of the table. This is possible under limited circumstances as is outline in a later section below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete(qtab)\n", - "kx.q.qsql.delete('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The columns keyword**\n", - "\n", - "The columns keyword is used to denote the columns that are to be deleted from a table.\n", - "\n", - "By default if a user does not provide this information it is assumed that all columns are to be deleted.\n", - "\n", - "The columns keyword is expected to be a string or list of strings denoting the columns to be deleted.\n", - "\n", - "**Note**: The columns and where clause can not be used in the same function call, this is not supported by the underlying functional delete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.delete(qtab, columns = 'col3')\n", - "kx.q.qsql.delete(qtab, columns = ['col1','col2'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The where keyword**\n", - "\n", - "The where keyword is used to filter rows of the data to be deleted.\n", - "\n", - "By default if no where condition is supplied it is assumed that all rows of the dataset are to be deleted.\n", - "\n", - "The where keyword is expected when not default to be a string on which to apply the filtering\n", - "\n", - "**Note**: The columns and where clause can not be used in the same function call, this is not supported by the underlying functional delete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete(qtab, where='col1 in `a`b')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The modify keyword**\n", - "\n", - "The modify keyword is used when the user intends for the underlying representation of a named entity within the q memory space to be modified. This is only applicable when calling the function via the `kx.q.qsql.delete` representation of the function.\n", - "\n", - "By default the underlying representation is not modified with `modify=False` in order to change the underlying representation a user must set `modify=True`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete('qtab', where = 'col1=`c', modify=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update and exec functionality\n", - "\n", - "Both the q functional update and exec functionality are supported by this interface. For brevity they are not shown in the same detail as the previous examples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.exec(qtab, 'col1')\n", - "# kx.q.qsql.exec(qtab, columns='col2', by='col1')\n", - "kx.q.qsql.exec(qtab, columns={'avgCol3': 'avg col3'}, by='col1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.update({'avg_col2':'avg col2'}, by={'col1': 'col1'})\n", - "# kx.q.qsql.update({'col3':100}, where='col1=`a')\n", - "kx.q.qsql.update('qtab', {'col2': 4.2}, 'col1=`b', modify=True)\n", - "kx.q['qtab']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## IPC Communication\n", - "\n", - "This module also provides users with the ability to retrieve data from remote q processes. This is supported in the absence and presence of a valid q license.\n", - "\n", - "More documentation including exhaustive lists of the functionality available can be found in the [`IPC`](../api/ipc.html) documentation." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Establishing a Connection\n", - "Connections to external q processes are established using the `pykx.QConnection` class. On initialization the instance of this class will establish a connection to the specified q process using the provided connection information (e.g. `host`, `port`, `username`, `password`, etc.). Refer to the PyKX IPC module documentation for more details about this interface, or run `help(pykx.QConnection)`.\n", - "\n", - "### IPC Example\n", - "The following is a basic example of this functionality a more complex subscriber/publisher example is provided in `examples/ipc/`\n", - "\n", - "This example will work in the presence or absence of a valid q license \n", - "\n", - "#### Create the external q process\n", - "To run this example, the Python code in the following cell will do the equivalent to executing the following in a terminal:\n", - "\n", - "```\n", - "$ q -p 5000\n", - "q)tab:([]100?`a`b`c;100?1f;100?0Ng)\n", - "q).z.ps:{[x]0N!(`.z.ps;x);value x}\n", - "q).z.pg:{[x]0N!(`.z.pg;x);value x}\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import subprocess\n", - "import time\n", - "proc = subprocess.Popen(\n", - " ('q', '-p', '5000'),\n", - " stdin=subprocess.PIPE,\n", - " stdout=subprocess.DEVNULL,\n", - " stderr=subprocess.DEVNULL,\n", - ")\n", - "proc.stdin.write(b'tab:([]100?`a`b`c;100?1f;100?0Ng)\\n')\n", - "proc.stdin.write(b'.z.ps:{[x]0N!(`.z.ps;x);value x}\\n')\n", - "proc.stdin.write(b'.z.pg:{[x]0N!(`.z.pg;x);value x}\\n')\n", - "proc.stdin.flush()\n", - "time.sleep(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Open a connection to this process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Normally a `with` block would be used for proper context management, but for the sake of this example the connection will be accessed and closed directly\n", - "conn = kx.QConnection('localhost', 5000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Make a simple synchronous request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qvec = conn('2+til 2')\n", - "qvec" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Make a simple asynchronous request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('setVec::10?1f', wait=False)\n", - "setVec = conn('setVec')\n", - "setVec" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run a defined function server side with provided arguments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pytab = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]})\n", - "conn('{[table;column;rows]rows#column#table}', pytab, ['col1'], 1).pd()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('{[table;column]newtab::table column}', pytab, 'col1', wait=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('newtab').np()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Disconnect from the q process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn.close()\n", - "# This happens automatically when you leave a `with` block that is managing a connection, or when a connection is garbage-collected." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Shutdown the q process we were connected to for the IPC demo\n", - "proc.stdin.close()\n", - "proc.kill()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - } - ], - "metadata": { - "file_extension": ".py()", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.5" - }, - "mimetype": "text/x-python", - "name": "python", - "npconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/getting-started/q_magic_command.ipynb b/docs/getting-started/q_magic_command.ipynb index aa2c079..b0ec902 100644 --- a/docs/getting-started/q_magic_command.ipynb +++ b/docs/getting-started/q_magic_command.ipynb @@ -133,6 +133,8 @@ "--nolarge: Disable messages over 2GB being sent / received\n", "--tls: Use a tls connection\n", "--unix: Use a unix connection\n", + "--reconnection_attempts: An int object denoting how many\n", + " reconnection attempts to make\n", "--noctx: Disable the context interface\n", "```\n", "\n", diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index aebc930..58c2634 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -361,7 +361,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt 3 0.452041 4 4 0.019615 0 ``` - + * Convert PyKX objects to PyArrow ```python diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index 85b044b..a5b8343 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -8,6 +8,318 @@ Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type. +## PyKX 2.4.0 + +#### Release Date + +2024-03-20 + +### Additions + +- Support for q/kdb+ `4.1` documentation [here](https://code.kx.com/q/releases/ChangesIn4.1/) added as an opt-in capability, this functionality is enabled through setting `PYKX_4_1_ENABLED` environment variable. + + ```python + >>> import os + >>> os.environ['PYKX_4_1_ENABLED'] = 'True' + >>> import pykx as kx + >>> kx.q.z.K + pykx.FloatAtom(pykx.q('4.1')) + ``` + +- Added support for Python `3.12`. + - Support for PyArrow in this python version is currently in Beta. +- Added conversion of NumPy arrays of type `datetime64[s]`, `datetime64[ms]`, `datetime64[us]` to `kx.TimestampVector` +- Added [Table.sort_values()](../user-guide/advanced/Pandas_API.ipynb#tablesort_values), [Table.nsmallest()](../user-guide/advanced/Pandas_API.ipynb#tablensmallest) and [Table.nlargest()](../user-guide/advanced/Pandas_API.ipynb#tablenlargest) to the Pandas like API for sorting tables. +- `Table.rename()` now supports non-numerical index columns and improved the quality of errors thrown. +- Added the `reconnection_attempts` key word argument to `SyncQConnection`, `SecureQConnection`, and `AsyncQConnection` IPC classes. This argument allows IPC connection to be automatically re-established when it is lost and a server has reinitialized. + + ```python + >>> import pykx as kx + >>> conn = kx.SyncQConnection(port = 5050, reconnection_attempts=4) + >>> conn('1+1') # Following this call the server on port 5050 was closed for 2 seconds + pykx.LongVector(pykx.q('2')) + >>> conn('1+2') + WARNING: Connection lost attempting to reconnect. + Failed to reconnect, trying again in 0.5 seconds. + Failed to reconnect, trying again in 1.0 seconds. + Connection successfully reestablished. + pykx.LongAtom(pykx.q('3')) + ``` + +- Added `--reconnection_attempts` option to Jupyter `%%q` magic making use of the above IPC logic changes. +- Addition of environment variable/configuration value `PYKX_QDEBUG` which allows debugging backtrace to be displayed for all calls into q instead of requiring a user to specify debugging is enabled per-call. This additionally works for remote IPC calls and utilisation of Jupyter magic commands. + + === "Behavior prior to change" + + ```python + >>> import pykx as kx + >>> kx.q('{x+1}', 'e') + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 230, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + >>> kx.q('{x+1}', 'e', debug=True) + backtrace: + [2] {x+1} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {if[y~();:(::)];2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 230, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + ``` + + === "Behavior post change" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'True' + >>> import pykx as kx + >>> kx.q('{x+1}', 'e') + backtrace: + [2] {x+1} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {if[y~();:(::)];2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 230, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + ``` + +### Fixes and Improvements + +- Added instructions for script to install Windows dependencies. +- Resolved segfaults on Windows when PyKX calls Python functions under q. + + === "Behavior prior to change" + + ```python + >>> import pykx as kx + >>> kx.q('{[f;x] f x}', sum, kx.q('4 4#til 16')) + + Sorry, this application or an associated library has encountered a fatal error and will exit. + If known, please email the steps to reproduce this error to tech@kx.com + with a copy of the kdb+ startup banner and the info printed below. + Thank you. + Fault address 0000000066110980 + ``` + + === "Behavior post change" + + ```python + >>> import pykx as kx + >>> kx.q('{[f;x] f x}', sum, kx.q('4 4#til 16')) + + pykx.LongVector(pykx.q('24 28 32 36')) + ``` + +- Updated kdb Insights Core libraries to 4.0.8, see [here](https://code.kx.com/insights/1.8/core/release-notes/latest.html#408) for more information. +- Updated `libq` 4.0 version to 2024.03.04 for all supported OS's. +- Fix issue where use of valid C backed q `code` APIs could result in segmentation faults when called. + + === "Behavior prior to change" + + ```python + >>> import pykx as kx + >>> isf = kx.q('.pykx.util.isf') + >>> isf + pykx.Foreign(pykx.q('code')) + >>> isf(True) + Sorry, this application or an associated library has encountered a fatal error and will exit. + If known, please email the steps to reproduce this error to tech@kx.com + with a copy of the kdb+ startup banner and the info printed below. + Thank you. + SIGSEGV: Fault address 0x85 + ``` + + === "Behavior post change" + + ```python + >>> import pykx as kx + >>> isf = kx.q('.pykx.util.isf') + >>> isf + pykx.Foreign(pykx.q('code')) + >>> isf(True) + pykx.BooleanAtom(pykx.q('0b')) + ``` + +- Fixed error since 2.2.1 in unlicensed mode when converting `TimestampVector` containing nulls to Python. + + === "Behavior prior to change" + + ```python + >>> conn('enlist 0Np').py() + Traceback (most recent call last): + File "", line 1, in + File "/home/rocuinneagain/.local/lib/python3.10/site-packages/pykx/wrappers.py", line 2443, in py + converted_vector[i]=q('0Np') + File "/home/rocuinneagain/.local/lib/python3.10/site-packages/pykx/embedded_q.py", line 216, in __call__ + raise LicenseException("run q code via 'pykx.q'") + pykx.exceptions.LicenseException: A valid q license must be in a known location (e.g. `$QLIC`) to run q code via 'pykx.q'. + ``` + + === "Behavior post change" + + ```python + >>> conn('enlist 0Np').py() + [pykx.TimestampAtom(pykx.q('0Np'))] + ``` + +- Each call to the PyKX query API interned 3 new unique symbols. This has now been removed. +- When using `pykx.schema.builder` users could not make use of `pykx.*Vector` objects for defining column types. This could result in confusion due to support for these types in other areas of the library (type casting etc). + + === "Behavior prior to change" + + ```python + >>> pykx.schema.builder({'x': pykx.LongVector, 'x1': pykx.LongAtom}) + Exception: Error: raised for column x error + ``` + + === "Behavior post change" + + ```python + >>> pykx.schema.builder({'x': pykx.LongVector, 'x1': pykx.LongAtom}) + pykx.Table(pykx.q(' + x x1 + ---- + ')) + ``` + +- Application of `astype` conversions could error if attempting to convert the column of a dataset to it's current type, this could be raised if using `astype` explicitly or when used internal to PyKX such as when defining the expected type when reading a CSV file. +- PyKX database table listing now uses `kx.q.Q.pt` instead of `kx.q.tables()` when presenting the available tables to a users, this more accurately reflects the tables that can be interacted with by a users within the process. + + === "Behavior prior to change" + + ```python + >>> tab = kx.Table(data = {'sym': ['a', 'b', 'c'], 'num': [1, 2, 3]}) + >>> tab.astype({'sym': kx.SymbolAtom}) + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 229, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + ``` + + === "Behavior post change" + + ```python + pykx.Table(pykx.q(' + sym num + ------- + a 1 + b 2 + c 3 + ')) + ``` + +- Fix to ensure that if set `PYKX_Q_LIB_LOCATION` is used as the value for `QHOME` when initializing PyKX. This ensures all symlinking happens in the expected location and that `\l` loading of files behaves correctly. +- Renamed `labels` parameter in `Table.rename()` to `mapper` to match Pandas. Added deprecation warning to `labels`. +- Fixed bug where keys were being enlisted when `Table.rename()` called. + + === "Behavior prior to change" + + ```python + >>> tab = kx.KeyedTable(data=kx.q('([] Policy: 1 2 3)')) + >>> tab.rename(index={0:'a'}) + idx Policy + -------------- + ,`a 1 + ,1 2 + ,2 3 + ``` + + === "Behavior post change" + + ```python + >>> tab = kx.KeyedTable(data=kx.q('([] Policy: 1 2 3)')) + >>> tab.rename(index={0:'a'}) + idx Policy + -------------- + `a 1 + 1 2 + 2 3 + ``` + +- Deprecation of `type` column in `dtypes` output as it is a reserved keyword. Use new `datatypes` column instead. +- Query API `merge` method no longer attempts to automatically key/unkey input tables when `q_join=True`. Users must pass correctly formed inputs. + + === "Behavior prior to change" + + ```python + >>> import pykx as kx + >>> tab1 = kx.Table(data={'k': ['foo', 'bar', 'baz', 'foo'], 'v': [1, 2, 3, 5]}) + >>> tab2 = kx.Table(data={'k': ['foo', 'bar', 'baz', 'foo'], 'v': [5, 6, 7, 8]}) + >>> tab1_keyed = tab1.set_index('k') + >>> tab1.merge(tab2, how='left', q_join=True) + ``` + + === "Behavior post change" + + ```python + >>> tab1.merge(tab2_keyed, how='left', q_join=True) + ``` + + +### Beta Features + +- Addition of `Compress` and `Encrypt` classes to allow users to set global configuration and for usage within Database partition persistence. + + === "Standalone usage of compression/encryption" + + ```python + >>> import pykx as kx + >>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=8) + >>> kx.q.z.zd + pykx.Identity(pykx.q('::')) + >>> compress.global_init() + pykx.LongVector(pykx.q('17 2 8')) + >>> encrypt = kx.Encrypt(path='/path/to/the.key', password='PassWord') + >>> encrypt.load_key() + ``` + + === "Usage of compression/encryption with Database" + + ```python + >>> import pykx as kx + >>> compress = kx.Compress(algo=kx.CompressionAlgorithm.lz4hc, level=10) + >>> db = kx.DB(path='/tmp/db') + >>> db.create(kx.q('([]10?1f;10?1f)', 'tab', kx.q('2020.03m'), compress=compress) + >>> kx.q('-21!`:/tmp/db/2020.03/tab/x') + pykx.Dictionary(pykx.q(' + compressedLength | 140 + uncompressedLength| 96 + algorithm | 4i + logicalBlockSize | 17i + zipLevel | 10i + ')) + ``` + +- On Windows from version 2.3.0 PyKX would raise the following warning message at startup about incompatibility between Threading feature and Windows, this now is only raised when `PYKX_THREADING` is set. + + ```python + C:\Users\username\AppData\Roaming\Python\Python311\site-packages\pykx\config.py:220: UserWarning: PYKX_THREADING is only supported on Linux / MacOS, it has been disabled. + warn('PYKX_THREADING is only supported on Linux / MacOS, it has been disabled.') + ``` + ## PyKX 2.3.2 #### Release Date @@ -17,6 +329,7 @@ ### Fixes and Improvements - Update of PyKX 4.0 linux shared object to version 2024.02.09, this update is to facilitate deployments on more secure linux/linux-arm environments. +- Update `Table.rename()` to skip over columns not in table instead of throwing error to match `pandas`. ## PyKX 2.3.1 @@ -58,7 +371,7 @@ ```python >>> def func(n=2): ... return n - ... + ... >>> kx.q['func']= func >>> kx.q('func', '') pykx.SymbolAtom(pykx.q('`')) @@ -108,7 +421,7 @@ >>> pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype()).to_numpy() array([ 1., nan, 3.]) - >>> kx.K(pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype())) + >>> kx.K(pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype())) pykx.FloatVector(pykx.q('1 -9.223372e+18 3')) ``` @@ -158,13 +471,13 @@ >>> qtab = kx.Table(data={'x': kx.random.random(N, 1.0, seed=10)}) >>> qtab pykx.Table(pykx.q(' - x + x ----------- - 0.0891041 - 0.8345194 - 0.3621949 - 0.999934 - 0.3837986 + 0.0891041 + 0.8345194 + 0.3621949 + 0.999934 + 0.3837986 .. ')) >>> kx.q.qsql.select(qtab, where = ['x>0.5'], inplace=True) @@ -294,7 +607,7 @@ - Deprecation of internally used environment variable `UNDER_PYTHON` which has been replaced by `PYKX_UNDER_PYTHON` to align with other internally used environment variables. - Fix `Unknown default conversion type` error when `PYKX_DEFAULT_CONVERSION` is set to `k` - Numpy dependency for Python 3.11 corrected to `numpy~=1.23.2` -- `pykx.q.qsql.select` and `pykx.q.qsql.exec` statements no longer use `get` calls for table retrieval unnecessarily when operating locally or via IPC. +- `pykx.q.qsql.select` and `pykx.q.qsql.exec` statements no longer use `get` calls for table retrieval unnecessarily when operating locally or via IPC. - Null integral values in table keys will no longer convert the underlying vectors to floats when converting from a `pykx.KeyedTable` to `pandas.DataFrame` === "Behaviour prior to change" @@ -313,7 +626,7 @@ ```python >>> kx.q('`col1 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd() col2 col3 - col1 + col1 1 1 a 2 2 b -- -- c @@ -326,7 +639,7 @@ ```python >>> kx.q('`col1`col2 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd() col3 - col1 col2 + col1 col2 1 1 a 2 2 b -9223372036854775808 -9223372036854775808 c @@ -374,14 +687,14 @@ ``` - Added consistent conversion of `datetime.time` objects - + === "Behavior prior to change" ```q q).pykx.pyexec"from datetime import time" q).pykx.eval["time(11, 34, 56)"]` foreign - ``` + ``` ```python >>> kx.toq(time(11, 34, 56)) @@ -398,7 +711,7 @@ q).pykx.pyexec"from datetime import time" q).pykx.eval["time(11, 34, 56)"]` 0D11:34:56.000000000 - ``` + ``` ```python >>> kx.toq(time(11, 34, 56)) @@ -406,15 +719,15 @@ ``` - Fixed null value for `TimestampVector` returning `NoneType` instead of `pykx.wrappers.TimestampAtom` for `.py()` method - + === "Before Null Change" - + ```python >>> for x in kx.q('0Np,.z.p').py(): ... print(type (x)) - ``` + ``` === "After Null Change" @@ -455,28 +768,28 @@ ')) >>> tab.agg(['min', 'mean']) pykx.KeyedTable(pykx.q(' - function| x x1 + function| x x1 --------| ----------------- min | 0 0.009771725 - mean | 4.588 5.152194 + mean | 4.588 5.152194 ')) - >>> + >>> >>> group_tab = kx.Table(data={ ... 'x': kx.random.random(1000, ['a', 'b']), ... 'y': kx.random.random(1000, 10.0)}) >>> group_tab.groupby('x').agg('mean') pykx.KeyedTable(pykx.q(' - x| y + x| y -| -------- a| 5.239048 b| 4.885599 ')) >>> group_tab.groupby('x').agg(mode) pykx.KeyedTable(pykx.q(' - x| y + x| y -| -------- a| 1.870281 - b| 4.46898 + b| 4.46898 ')) ``` @@ -592,7 +905,7 @@ ``` - Added feature to extract individual elements of both `TimestampAtom` and `TimestampVector` in a pythonic way including: - + * `date` - DateAtom / DateVector * `time` - TimeAtom / TimeVector * `year` - IntAtom / IntVector @@ -604,7 +917,7 @@ ```python >>> timestamp_atom = kx.q('2023.10.25D16:42:01.292070013') - + >>> timestamp_atom.time pykx.TimeAtom(pykx.q('16:42:01.292')) >>> timestamp_atom.date @@ -614,7 +927,7 @@ >>> timestamp_atom_2 = kx.q('2018.11.09D12:21:08.456123789') >>> timestamp_vector = kx.q('enlist', timestamp_atom, timestamp_atom_2) - + >>> timestamp_vector.time pykx.TimeVector(pykx.q('16:42:01.292 12:21:08.456')) >>> timestamp_vector.date @@ -714,7 +1027,7 @@ - Jupyter Notebook: - Removal of `FutureWarning` when displaying tables and dictionaries. - Revert issue causing results to be displayed as pointer references rather than Python objects in unlicensed mode. - - `%%q` magic now suppresses displaying of `::`. + - `%%q` magic now suppresses displaying of `::`. - `%%q` magic addition of `--display` option to have `display` be called on returned items in place of the default `print`. - `PyKXReimport` now additionally unsets/resets: `PYKX_SKIP_UNDERQ`, `PYKX_EXECUTABLE`, `PYKX_DIR` diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index ee56ed9..79ead84 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -271,7 +271,7 @@ "source": [ "### Table.dtypes\n", "\n", - "Get the pandas dtype of each column" + "Get the datatypes of the table columns" ] }, { @@ -375,7 +375,7 @@ { "cell_type": "code", "execution_count": null, - "id": "603d5534", + "id": "77ab64ab", "metadata": {}, "outputs": [], "source": [ @@ -960,7 +960,7 @@ }, { "cell_type": "markdown", - "id": "05765a04", + "id": "05124590", "metadata": {}, "source": [ "### Table.sample()\n", @@ -1064,9 +1064,7 @@ { "cell_type": "markdown", "id": "82b501a6", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "### Table.select_dtypes()\n", "\n", @@ -1222,7 +1220,310 @@ }, { "cell_type": "markdown", - "id": "29b0e773", + "id": "32d2194b-fe6e-4789-9437-fa8cec5f9287", + "metadata": {}, + "source": [ + "## Sorting" + ] + }, + { + "cell_type": "markdown", + "id": "38d04a7b-603d-4ecb-afb0-c7999b6d23ec", + "metadata": {}, + "source": [ + "### Table.sort_values()\n", + "\n", + "```\n", + "Table.sort_values(by, ascending=True)\n", + "```\n", + "\n", + "Sort Table objects based on the value of a selected column.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-----: |\n", + "| by | str or list of str | The name of the column to sort by. | _required_ |\n", + "| ascending | bool | The order in which to sort the values, ascending is True and descending is False. | True |\n", + "\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------ |\n", + "| Table | The resulting table after the sort has been perfomed |" + ] + }, + { + "cell_type": "markdown", + "id": "b71e942a-1247-4931-9a0f-edd2fd97b185", + "metadata": {}, + "source": [ + "**Examples:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b8e2204-1e4e-4776-8f6a-22589ff66124", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(data={'column_a': [20, 3, 100],'column_b': [56, 15, 42], 'column_c': [45, 80, 8]})\n", + "tab" + ] + }, + { + "cell_type": "markdown", + "id": "9494343e-34d1-4303-8007-38afe9ee6ead", + "metadata": {}, + "source": [ + "Sort a Table by the second column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fce9c74a-ed0b-4d2f-92f4-2b9b42762d4b", + "metadata": {}, + "outputs": [], + "source": [ + "tab.sort_values(by='column_b')" + ] + }, + { + "cell_type": "markdown", + "id": "6ee86878-634f-4383-bb90-af361b785f59", + "metadata": {}, + "source": [ + "Sort a Table by the third column in descending order" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8edac0a-f6f0-4a70-ae51-7c8599ee4da9", + "metadata": {}, + "outputs": [], + "source": [ + "tab.sort_values(by='column_c', ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "2b61d8b5-52a1-4c05-9347-c205ba6934d7", + "metadata": {}, + "source": [ + "### Table.nsmallest()\n", + "```\n", + "Table.nsmallest(\n", + " n,\n", + " columns,\n", + " keep='first'\n", + ")\n", + "```\n", + "\n", + "Return the first n rows of a Table ordered by columns in ascending order\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :---------: | :--: | :----------------------------------------------------------------------------------| :-----------: |\n", + "| n | int | The number of rows to return | _required_ |\n", + "| columns | str or list of str | Column labels to order by | _required_ |\n", + "| keep | str | Can be 'first', 'last' or 'all'. Used in case of duplicate values | 'first' | \n", + "\n", + "**Returns**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------ |\n", + "| Table | The first n rows ordered by the given columns in ascending order |" + ] + }, + { + "cell_type": "markdown", + "id": "c2430479-e832-4c6a-8cc0-651dd6af57b4", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Sample table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "768f4e97-79a4-4abb-bced-5fa99f87c4ca", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(data={'column_a': [2, 3, 2, 2, 1],'column_b': [56, 15, 42, 102, 32], 'column_c': [45, 80, 8, 61, 87]})\n", + "tab" + ] + }, + { + "cell_type": "markdown", + "id": "79600d41-ef99-478e-89e6-5e67eadb6ee7", + "metadata": {}, + "source": [ + "Get the row where the first column is the smallest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "287c6905-d508-441b-887b-b71233e1d133", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nsmallest(n=1, columns='column_a')" + ] + }, + { + "cell_type": "markdown", + "id": "48f5485e-4353-4523-8cc8-8655b1b8a9c3", + "metadata": {}, + "source": [ + "Get the 4 rows where the first column is the smallest, then any equal values are sorted based on the second column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1772dd9b-389e-4da2-8994-245cfaa6d942", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nsmallest(n=4,columns=['column_a', 'column_b'])" + ] + }, + { + "cell_type": "markdown", + "id": "7869e8c1-a303-466f-8afc-3ebdb59a379d", + "metadata": {}, + "source": [ + "Get the 2 rows with the smallest values for the first column and in case of duplicates, take the last entry in the table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "425a2841-610f-4cb2-9703-105ea14ac900", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nsmallest(n=2, columns=['column_a'], keep='last')" + ] + }, + { + "cell_type": "markdown", + "id": "64ee5a21-7234-40f1-b720-e176740f4fc4", + "metadata": {}, + "source": [ + "### Table.nlargest()\n", + "```\n", + "Table.nlargest(\n", + " n,\n", + " columns,\n", + " keep='first'\n", + ")\n", + "```\n", + "\n", + "Return the first n rows of a Table ordered by columns in descending order\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-------: |\n", + "| n | int | The number of rows to return | _required_|\n", + "| columns | str or list of str | Column labels to order by | _required_|\n", + "| keep | str | Can be 'first', 'last' or 'all'. Used in case of duplicate values | 'first' | \n", + "\n", + "**Returns**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------ |\n", + "| Table | The first n rows ordered by the given columns in descending order|" + ] + }, + { + "cell_type": "markdown", + "id": "66b7c0a9-3d23-47c9-af79-8020c52d32e2", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Sample table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fa56308-8ede-448c-9cb6-0c232aac0dee", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(data={'column_a': [2, 3, 2, 2, 1],'column_b': [102, 15, 42, 56, 32], 'column_c': [45, 80, 8, 61, 87]})\n", + "tab" + ] + }, + { + "cell_type": "markdown", + "id": "2d8a45f7-a91a-41d5-854b-4bdfb7f696ef", + "metadata": {}, + "source": [ + "Get the row with the largest value for the first column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88fa3ff8-4e31-4006-aec2-c697390e2b29", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nlargest(n=1, columns='column_a')" + ] + }, + { + "cell_type": "markdown", + "id": "68da7ae5-e181-45dd-8fe4-ae078da131a6", + "metadata": {}, + "source": [ + "Get the 4 rows where the first column is the largest, then any equal values are sorted based on the third column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81647d24-282a-48ee-bf75-d08838211e94", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nlargest(n=4,columns=['column_a', 'column_c'])" + ] + }, + { + "cell_type": "markdown", + "id": "d538d7f0-c9ff-42a0-9dd5-c95792637775", + "metadata": {}, + "source": [ + "Get the 2 rows with the smallest values for the first column and in case of duplicates, take all rows of the same value for that column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c898e01e-60ef-4763-9728-2e215962f393", + "metadata": {}, + "outputs": [], + "source": [ + "tab.nsmallest(n=2, columns=['column_a'], keep='all')" + ] + }, + { + "cell_type": "markdown", + "id": "ed1a193f-b02f-4af3-bdf2-acf46d374901", "metadata": {}, "source": [ "## Data Joins/Merging" @@ -1230,7 +1531,7 @@ }, { "cell_type": "markdown", - "id": "666a7621", + "id": "ef401426", "metadata": {}, "source": [ "### Table.merge()\n", @@ -1304,24 +1605,6 @@ "tab1.merge(tab2, left_on='lkey', right_on='rkey')" ] }, - { - "cell_type": "markdown", - "id": "e004bf64", - "metadata": {}, - "source": [ - "Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, \\_x and \\_y, appended." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07df7437", - "metadata": {}, - "outputs": [], - "source": [ - "tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)" - ] - }, { "cell_type": "markdown", "id": "7350d9db", @@ -1434,6 +1717,69 @@ "tab1.merge(tab2, how='cross')" ] }, + { + "cell_type": "markdown", + "id": "d552054e-883a-41ae-96b7-3e4394d6a0d9", + "metadata": {}, + "source": [ + "Merge tab1 and tab2_keyed using a left join with `q_join` set to `True`. Inputs/Outputs will match q [lj](https://code.kx.com/q/ref/lj/) behaviour." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d3d70c5-9ad9-45ee-b69f-d855c3f116af", + "metadata": {}, + "outputs": [], + "source": [ + "tab1 = kx.Table(data={'a': ['foo', 'bar', 'baz'], 'b': [1, 2, 3]})\n", + "tab2 = kx.Table(data={'a': ['foo', 'baz', 'baz'], 'c': [3, 4, 5]})\n", + "tab2_keyed = tab2.set_index(1)\n", + "tab1.merge(tab2_keyed, how='left', q_join=True)" + ] + }, + { + "cell_type": "markdown", + "id": "e4e4b882-1fd9-4069-93ae-18848301a5fc", + "metadata": {}, + "source": [ + "Inputs/Outputs will match q [ij](https://code.kx.com/q/ref/ij/) behaviour." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf32cdee-0b20-44f9-b0f5-db44be5e8d91", + "metadata": {}, + "outputs": [], + "source": [ + "tab3 = kx.Table(data={'a': ['foo', 'bar'], 'd': [6, 7]})\n", + "tab3_keyed = tab3.set_index(1)\n", + "tab1.merge(tab3_keyed, how='inner', q_join=True)" + ] + }, + { + "cell_type": "markdown", + "id": "5e619567-b73d-4821-976e-4b5f9bdddef4", + "metadata": {}, + "source": [ + "Merge using `q_join` set to `True`, and `how` set to `left`, will fail when `tab2` is not a keyed table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a3e697-8ee8-47ee-9cf9-299e1ebfef61", + "metadata": {}, + "outputs": [], + "source": [ + "#Will error as Left Join requires a keyed column for the right dataset.\n", + "try:\n", + " tab1.merge(tab2, how='left', q_join=True)\n", + "except ValueError as e:\n", + " print(f'Caught Error: {e}')" + ] + }, { "cell_type": "markdown", "id": "7583c015", @@ -3210,23 +3556,24 @@ "### Table.rename()\n", "\n", "```\n", - "Table.rename(labels=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore')\n", + "Table.rename(labels=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore', mapper=None)\n", "```\n", "\n", "Rename columns in a table and return the resulting Table object.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", + "| Name | Type | Description | Default |\n", "| :------: | :----: | :------------------------------------------------------------------------------------------------------------------| :---:|\n", - "| labels | dict | A dictionary of either new index or column names to new names to be used in conjunction with the _axis_ parameter. | None |\n", + "| labels | dict | Deprecated. Please use `mapper` keyword. | None |\n", "| columns | dict | A dictionary of column name to new column name to use when renaming. | None |\n", - "| index | dict | A dictionary of index to new index name to use when renaming keyed tables. | None |\n", - "| axis | {0 or 'index', 1 or 'columns'} | Designating the axis to be renamed by the _labels_ dictionary. | None |\n", + "| index | dict | A dictionary of index to new index name to use when renaming single key column keyed tables. | None |\n", + "| axis | {0 or 'index', 1 or 'columns'} | Designating the axis to be renamed by the _mapper_ dictionary. | None |\n", "| copy | None | Not yet implemented. | None |\n", "| inplace | bool | Not yet implemented. | None |\n", "| level | None | Not yet implemented. | None |\n", "| errors | string | Not yet implemented. | None |\n", + "| mapper | dict | A dictionary of either new index or column names to new names to be used in conjunction with the _axis_ parameter. | None |\n", "\n", "**Returns:**\n", "\n", diff --git a/docs/user-guide/advanced/environment_variables.md b/docs/user-guide/advanced/environment_variables.md deleted file mode 100644 index 5abcff1..0000000 --- a/docs/user-guide/advanced/environment_variables.md +++ /dev/null @@ -1,54 +0,0 @@ -# Modifying PyKX using environment variables - -The following environment variables can be used to tune PyKX behavior at run time. These variables need to be set before attempting to import PyKX and will take effect for the duration of the execution of the PyKX process. - - -## General - -The following variables can be used to enable or disable advanced features of PyKX: - -| Variable | Values | Description | -|----------------------------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `IGNORE_QHOME` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts and dependencies to load. This variable instructs PyKX to ignore the existing q installation and not load from this directory. | -| `KEEP_LOCAL_TIMES` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local timezone. | -| `PYKX_ALLOCATOR` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. | -| `PYKX_ENABLE_PANDAS_API` | `1` or `true` | Enable the Pandas API for `pykx.Table` objects | -| `PYKX_GC` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the numpy array at the cost of a small overhead. | -| `PYKX_LOAD_PYARROW_UNSAFE` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. | -| `PYKX_MAX_ERROR_LENGTH` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. | -| `PYKX_NOQCE` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from Insights Core (https://code.kx.com/insights/1.2/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | -| `PYKX_Q_LIB_LOCATION` | Path to a directory containing q libraries necessary for loading PyKX | See [here](https://code.kx.com/pykx/changelog.html#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. | -| `PYKX_RELEASE_GIL` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. | -| `PYKX_Q_LOCK` | `1` or `true` | When PYKX_Q_LOCK is enabled a reentrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be threadsafe even when using PYKX_RELEASE_GIL. | -| `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. | -| `PYKX_NO_SIGINT` | `1` or `true` | If this environment variable is set the SIGINT handler will not be overwritten from the `q` default. | - -The variables below can be used to set the environment for q (embedded in PyKX, in licensed mode): - -| Variable | Values | Description | -|----------|----------|-------------| -| `QARGS` | See link | Command-line flags to pass to q, see [here](https://code.kx.com/q/basics/cmdline/) for more information. | -| `QHOME` | Path to the users q installation folder | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | -| `QLIC` | Path to the folder where the q license should be found | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | - - -## PyKX under q - -PyKX can be loaded and used from a q session (see [here](running_under_q.md) for more information). The following variables are specific to this mode of operation. - -| Variable | Values | Description | -|----------|--------|-------------| -| `PYKX_DEFAULT_CONVERSION` | `py`, `np`, `pd`, `pa` or `k` | Default conversion to apply when passing q objects to Python. Converting to Numpy (`np`) by default. | -| `SKIP_UNDERQ` | `1` or `true` | When importing PyKX from Python, PyKX will also load `pykx.q` under its embedded q. This variable skip this step. | -| `UNSET_PYKX_GLOBALS` | `1` or `true` | By default "PyKX under q" will load some utility functions into the global namespace (eg. `print`). This variable prevents this. | -| `PYKX_PYTHON_LIB_PATH` | File path | The path to use for loading libpython. | -| `PYKX_PYTHON_BASE_PATH` | File path | The path to use for the base directory of your Python installation. | -| `PYKX_PYTHON_HOME_PATH` | File path | The path to use for the base Python home directory (used to find site packages). | - -## q Cloud Edition features with Insights Core (Linux only) - -On Linux, the q Cloud Edition features, coming with Insights Core, can be used to read data from Cloud Storage (AWS S3, Google Cloud Storage, Azure Blob Storage). Credentials to access the Cloud Storage can be passed using specific environment variables. For more information, see the two following links: - -- https://code.kx.com/insights/core/objstor/main.html#environment-variables -- https://code.kx.com/insights/1.2/core/kurl/kurl.html#automatic-registration-using-credential-discovery - diff --git a/docs/user-guide/advanced/ipc.md b/docs/user-guide/advanced/ipc.md index 4be6a67..a9258e1 100644 --- a/docs/user-guide/advanced/ipc.md +++ b/docs/user-guide/advanced/ipc.md @@ -20,7 +20,7 @@ used to replace the functionality of [`qPython`](https://github.com/exxeleron/qP ```python # Licensed mode -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: result = q.til(10) print(result) print(result.py()) @@ -31,12 +31,12 @@ with pykx.SyncQConnection('localhost', 5001) as q: ```python # Unlicensed mode -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: result = q.til(10) print(result) print(result.py()) -pykx.LongVector._from_addr(0x7fcab6800b80) +kx.LongVector._from_addr(0x7fcab6800b80) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ``` @@ -50,7 +50,7 @@ ensure that the connection instance is properly closed automatically when leavin Manually creating a `QConnection` ```python -q = pykx.SyncQConnection('localhost', 5001) # Directly instantiate a QConnection instance +q = kx.SyncQConnection('localhost', 5001) # Directly instantiate a QConnection instance q(...) # Make some queries q.close() # Must manually ensure it is closed when no longer needed ``` @@ -58,7 +58,7 @@ q.close() # Must manually ensure it is closed when no longer needed Using a context interface to create and manage the `QConnection` ```python -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: q(...) # Make some queries # QConnection is automatically closed here ``` @@ -84,7 +84,7 @@ The following call to the q function [`save`](../../api/pykx-execution/q.md#save because `q('save')` returns a regular [`pykx.Function`][] object. ```python -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: q('save')('t') # Executes locally within Embedded q ``` @@ -95,7 +95,7 @@ its execution context using its symbol value, and so it is executed in the q ser [`save`](../../api/pykx-execution/q.md#save) is defined. ```python -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: q.save('t') # Executes in the q server over IPC ``` @@ -105,7 +105,7 @@ and so what is returned is the result of calling [`save`](../../api/pykx-executi rather than the [`save`](../../api/pykx-execution/q.md#save) function itself. ```python -with pykx.SyncQConnection('localhost', 5001) as q: +with kx.SyncQConnection('localhost', 5001) as q: q('save', 't') # Executes in the q server over IPC ``` @@ -121,7 +121,7 @@ you use the `event_loop` keyword argument to pass the event loop into the [`pykx This will allow the eventloop to properly manage the returned [`pykx.QFuture`][] objects. ```python -async with pykx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q: +async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q: fut = q('til 10') # returns a QFuture that can later be awaited on, this future is attached to the event loop await fut # await the future object to get the result ``` @@ -131,7 +131,7 @@ If you are using an [`pykx.AsyncQConnection`][] to make q queries that respond i a dedicated [`pykx.AsyncQConnection`][] instance that is closed upon the result being received. ```python -async with pykx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q: +async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q: fut = q('query', wait=False, reuse=False) # query a q process that is going to return a deferred result await fut # await the future object to get the result ``` @@ -143,13 +143,29 @@ In addition to the ability to execute code remotely using explicit calls to the The following provide and example of the usage of this functionality on both a syncronous and asyncronous use-case. ```python -with pykx.SyncQConnection(port = 5000) as q: +with kx.SyncQConnection(port = 5000) as q: q.file_execute('/absolute/path/to/file.q') ret = q('.test.variable.set.in.file.q', return_all=True) ``` ```python -async with pykx.AsyncQConnection('localhost', 5001) as q: +async with kx.AsyncQConnection('localhost', 5001) as q: q.file_execute('../relative/path/to/file.q') ret = await q('.test.variable.set.in.file.q') ``` + +## Reconnecting to a kdb+ server + +When generating a client-server architecture it is often the case that for short periods of time your server may be inaccessible due to network issues or planned outages. At such times clients connected to these servers will need to reconnect, this may require them to manually 'close' their existing stale connection and reconnect using the same credentials to the now restarted server. From PyKX 2.4+ the ability to manually configure reconnection attempts for clients connecting to servers has been added via the addition of the `reconnection_attempts` keyword argument. The following example shows the output of when attempting to make use of a connection which has been cancelled and is subsequently re-established: + +```python +>>> conn = kx.SyncQconnection(port=5050, reconnection_attempts=5) +>>> conn('1+1') # after this call the server on port 5050 is shutdown for 2 seconds +pykx.LongAtom(pykx.q('2') +>>> conn('1+2') +WARNING: Connection lost attempting to reconnect. +Failed to reconnect, trying again in 0.5 seconds. +Failed to reconnect, trying again in 1.0 seconds. +Connection successfully reestablished. +pykx.LongAtom(pykx.q('3')) +``` diff --git a/docs/user-guide/advanced/running_under_q.md b/docs/user-guide/advanced/running_under_q.md deleted file mode 100644 index 9b2b339..0000000 --- a/docs/user-guide/advanced/running_under_q.md +++ /dev/null @@ -1,748 +0,0 @@ -# Using PyKX within a q session - -## Introduction - -As described in the majority of the documentation associated with PyKX, the principal intended usage of the library is as Python first interface to the programming language q and it's underlying database kdb+. However as described in the limitations section [here](limitations.md) not all use-cases can be satisfied with this modality. In particular software relying on the use of active subscriptions such as real-time analytic engines or any functionality reliant on timers in q cannot be run from Python directly without reimplementing this logic Pythonically. - -As such a modality is distributed with PyKX which allows Python functionality to be run from within a q session. This is achieved through the creation of a domain-specific language (DSL) which allows for the execution and manipulation of Python objects within a q session. Providing this functionality allows users proficient in kdb+/q to build applications which embed machine learning/data science libraries within production q infrastructures and allows users to use plotting libraries to visualise the outcomes of their analyses. - -## Getting started - -### Prerequisites - -To make use of PyKX running embedded within a q session a user must have the following set up - -1. The user has access to a running `q` environment, follow the q installation guide [here](https://code.kx.com/q/learn/install/) for more information. -2. The user is permissioned to run PyKX with access to a license containing the feature flags `insights.lib.pykx` and `insights.lib.embedq` For more information see [here](../../getting-started/installing.md). - -### Installation - -To facilitate the execution of Python code within a q session a user must first install the PyKX library and the q script used to drive this embedded feature into their `$QHOME` location. This can be done as follows. - -1. Install the PyKX library following the instructions [here](../../getting-started/installing.md). -2. Run the following command to install the `pykx.q` script: - - ```python - python -c "import pykx;pykx.install_into_QHOME()" - ``` - - If you previously had `embedPy` installed pass: - - ```python - python -c "import pykx;pykx.install_into_QHOME(overwrite_embedpy=True)" - ``` - - If you cannot edit files in `QHOME` you can copy the files to your local folder and load `pykx.q` from there: - - ```bash - python -c "import pykx;pykx.install_into_QHOME(to_local_folder=True)" - ``` - -### Initialisation - -Once installation has been completed a user should be in a position to initialise the library as follows - -```q -q)\l pykx.q -q).pykx -console | {pyexec"pykx.console.PyConsole().interact(banner='', exitmsg='')"} -getattr | code -get | {[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r}[code]enlist -setattr | {i.load[(`set_attr;3)][unwrap x;y;i.convertArg[i.toDefault z]`.]} -set | {i.load[(`set_global;2)][x; i.convertArg[i.toDefault y]`.]} -print | {$[type[x]in 104 105 112h;i.repr[0b] unwrap x;show x];} -repr | {$[type[x]in 104 105 112h;i.repr[1b] unwrap x;.Q.s x]} -import | {[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r}[code]enlist -.. -``` - -## Using the library - -Usage of the functionality provided by this library can range in complexity from the simple execution of Python code through to the generation of streaming applications containing machine learning models. The following documentation section outlines the use of this library under various use-case agnostic scenarios - -### Evaluating and Executing Python code - -#### Executing Python code - -This interface allows a user to execute Python code a variety of ways: - -1. Executing directly using the `.pykx.pyexec` function - - This is incredibly useful if there is a requirement to script execution of Python code within a library - - ```q - q).pykx.pyexec"import numpy as np" - q).pykx.pyexec"array = np.array([0, 1, 2, 3])" - q).pykx.pyexec"print(array)" - [0 1 2 3] - ``` - -2. Usage of the PyKX console functionality - - This is useful when interating within a q session and needing to prototype some functionality in Python - - ```q - q).pykx.console[] - >>> import numpy as np - >>> print(np.linspace(0, 10, 5)) - [ 0. 2.5 5. 7.5 10. ] - >>> quit() - q) - ``` - -3. Execution through use of a `p)` prompt - - Provided as a way to embed execution of Python code within a q script, additionally this provides backwards compatibility with PyKX. - - ```q - q)p)import numpy as np - q)p)print(np.arange(1, 10, 2)) - [1 3 5 7 9] - ``` - -4. Loading of a `.p` file - - This is provided as a method of executing the contents of a Python file in bulk. - - ```q - $ cat test.p - def func(x, y): - return(x+y) - $ q pykx.q - q)\l test.p - q).pykx.get[`func] - {[f;x].pykx.i.pykx[f;x]}[foreign]enlist - ``` - -#### Evaluating Python code - -The evaluation of Python code can be completed using PyKX by passing a string of Python code to a variety of functions. - -??? "Differences between evaluation and execution" - - Python evaluation (unlike Python execution) does not allow side effects. Any attempt at variable assignment or class definition will signal an error. To execute a string performing side effects, use `.pykx.pyexec` or `.p.e`. - - [Difference between eval and exec in Python]()https://stackoverflow.com/questions/2220699/whats-the-difference-between-eval-exec-and-compile) - -To evaluate Python code and return the result to `q`, use the function `.pykx.qeval`. - -```q -q).pykx.qeval"1+2" -3 -``` - -Similarly to evaluate Python code and return the result as a `foreign` object denoting the underlying Python object - -```q -q)show a:.pykx.pyeval"1+2" -foreign -q)print a -3 -``` - -Finally to return a hybrid representation which can be manipulated to return the q or Python representation you can run the following - -```q -q)show b:.pykx.eval"1+2" -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)b` // Convert to a q object -3 -q)b`. // Convert to a Python foreign -foreign -``` - -## Interacting with PyKX objects - -### Foreign objects - -At the lowest level, Python objects are represented in q as foreign objects, which contain pointers to objects in the Python memory space. - -Foreign objects can be stored in variables just like any other q datatype, or as part of lists, dictionaries or tables. They will display as foreign when inspected in the q console or using the string (or .Q.s) representation. - -**Serialization:** Kdb+ cannot serialize foreign objects, nor send them over IPC: they live in the embedded Python memory space. To pass these objects over IPC, first convert them to q. - -### PyKX objects - -Foreign objects cannot be directly operated on in q. Instead, Python objects are typically represented as PyKX objects, which wrap the underlying foreign objects. This provides the ability to get and set attributes, index, call or convert the underlying foreign object to a q object. - -Use .pykx.wrap to create an PyKX object from a foreign object. - -```q -q)x -foreign -q)p:.pykx.wrap x -q)p /how an PyKX object looks -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -``` - -More commonly, PyKX objects are retrieved directly from Python using one of the following functions: - -function | argument | example ----------------|--------------------------------------------------|----------------------- -`.pykx.import` | symbol: name of a Python module or package, optional second argument is the name of an object within the module or package | ``np:.pykx.import`numpy`` -`.pykx.get` | symbol: name of a Python variable in `__main__` | ``v:.pykx.get`varName`` -`.pykx.eval` | string: Python code to evaluate | `x:.pykx.eval"1+1"` - -**Side effects:** As with other Python evaluation functions and noted previously, `.pykx.eval` does not permit side effects. - -### Converting data - -Given `obj`, an PyKX object representing Python data, we can get the underlying data (as foreign or q) using - -```q -obj`. / get data as foreign -obj` / get data as q -``` - -For example: - -```q -q)x:.pykx.eval"(1,2,3)" -q)x -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)x`. -foreign -q)x` -1 2 3 -``` - -### `None` and identity - -Python `None` maps to the q identity function `::` when converting from Python to q (and vice versa). - -There is one important exception to this. When calling Python functions, methods or classes with a single q data argument, passing `::` will result in the Python object being called with _no_ arguments, rather than a single argument of `None`. See the section below on _Zero-argument calls_ for how to explicitly call a Python callable with a single `None` argument. - -### Getting attributes and properties - -Given `obj`, an PyKX object representing a Python object, we can get an attribute or property directly using - -```q -obj`:attr / equivalent to obj.attr in Python -obj`:attr1.attr2 / equivalent to obj.attr1.attr2 in Python -``` - -These expressions return PyKX objects, allowing users to chain operations together. - -```q -obj[`:attr1]`:attr2 / equivalent to obj.attr1.attr2 in Python -``` - -e.g. - -```bash -$ cat class.p -class obj: - def __init__(self,x=0,y=0): - self.x = x - self.y = y -``` - -```q -q)\l class.p -q)obj:.pykx.eval"obj(2,3)" -q)obj[`:x]` -2 -q)obj[`:y]` -3 -``` - -### Setting attributes and properties - -Given `obj`, an PyKX object representing a Python object, we can set an attribute or property directly using - -```q -obj[:;`:attr;val] / equivalent to obj.attr=val in Python -``` - -e.g. - -```q -q)obj[`:x]` -2 -q)obj[`:y]` -3 -q)obj[:;`:x;10] -q)obj[:;`:y;20] -q)obj[`:x]` -10 -q)obj[`:y]` -20 -``` - -### Indexing - -Given `lst`, an PyKX object representing an indexable container object in Python, we can access the element at index `i` using - -```q -lst[@;i] / equivalent to lst[i] in Python -``` - -We can set the element at index `i` (to object `x`) using - -```q -lst[=;i;x] / equivalent to lst[i]=x in Python -``` - -These expressions return PyKX objects, e.g. - -```q -q)lst:.pykx.eval"[True,2,3.0,'four']" -q)lst[@;0]` -1b -q)lst[@;-1]` -`four -q)lst'[@;;`]2 1 0 3 -3f -2 -1b -`four -q)lst[=;0;0b]; -q)lst[=;-1;`last]; -q)lst` -0b -2 -3f -`last -``` - -### Getting methods - -Given `obj`, an PyKX object representing a Python object, we can access a method directly using - -```q -obj`:method / equivalent to obj.method in Python -``` - -Presently the calling of PyKX objects representing Python methods is only supported in such a manner that the return of evaluation is a PyKX object. - -For example - -```q -q)np:.pykx.import`numpy -q)np`:arange -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)arange:np`:arange / callable returning PyKX object -q)arange 12 -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)arange[12]` -0 1 2 3 4 5 6 7 8 9 10 11 -``` - -### PyKX function API - -Using the function API, PyKX objects can be called directly (returning PyKX objects) or declared callable returning q or `foreign` data. - -Users explicitly specify the return type as q or foreign, the default is as a PyKX object. - -Given `func`, an `PyKX` object representing a callable Python function or method, we can carry out the following operations: - -```q -func / func is callable by default (returning PyKX) -func arg / call func(arg) (returning PyKX) -func[<] / declare func callable (returning q) -func[<]arg / call func(arg) (returning q) -func[<;arg] / equivalent -func[>] / declare func callable (returning foreign) -func[>]arg / call func(arg) (returning foreign) -func[>;arg] / equivalent -``` - -**Chaining operations** Returning another PyKX object from a function or method call, allows users to chain together sequences of operations. We can also chain these operations together with calls to `.pykx.import`, `.pykx.get` and `.pykx.eval`. - - -### PyKX examples - -Some examples - -```bash -$ cat test.p # used for tests -class obj: - def __init__(self,x=0,y=0): - self.x = x # attribute - self.y = y # property (incrementing on get) - @property - def y(self): - a=self.__y - self.__y+=1 - return a - @y.setter - def y(self, y): - self.__y = y - def total(self): - return self.x + self.y -``` - -```q -q)\l test.p -q)obj:.pykx.get`obj / obj is the *class* not an instance of the class -q)o:obj[] / call obj with no arguments to get an instance -q)o[`:x]` -0 -q)o[;`]each 5#`:x -0 0 0 0 0 -q)o[:;`:x;10] -q)o[`:x]` -10 -q)o[`:y]` -1 -q)o[;`]each 5#`:y -3 5 7 9 11 -q)o[:;`:y;10] -q)o[;`]each 5#`:y -10 13 15 17 19 -q)tot:o[`:total;<] -q)tot[] -30 -q)tot[] -31 -``` - -```q -q)np:.pykx.import`numpy -q)v:np[`:arange;12] -q)v` -0 1 2 3 4 5 6 7 8 9 10 11 -q)v[`:mean;<][] -5.5 -q)rs:v[`:reshape;<] -q)rs[3;4] -0 1 2 3 -4 5 6 7 -8 9 10 11 -q)rs[2;6] -0 1 2 3 4 5 -6 7 8 9 10 11 -q)np[`:arange;12][`:reshape;3;4]` -0 1 2 3 -4 5 6 7 -8 9 10 11 -``` - -```q -q)stdout:.pykx.import[`sys]`:stdout.write -q)stdout `$"hello\n"; -hello -q)stderr:.pykx.import[`sys;`:stderr.write] -q)stderr `$"goodbye\n"; -goodbye -``` - -```q -q)oarg:.pykx.eval"10" -q)oarg` -10 -q)ofunc:.pykx.eval["lambda x:2+x";<] -q)ofunc[1]` -3 -q)ofunc oarg -12 -q)p)def add2(x,y):return x+y -q)add2:.pykx.get[`add2;<] -q)add2[1;oarg] -11 -``` - -### Function argument types - -One of the distinct differences that PyKX has over the previous incarnation of embedded interfacing with Python in q PyKX is support for a much wider variety of data type conversions between q and Python. - -In particular the following types are supported: - -1. Python native objects -2. Numpy objects -3. Pandas objects -4. PyArrow objects -5. PyKX objects - -By default when passing a q object to a callable function it will be converted to it's underlying Numpy equivalent representation. This will be the case for all types including tabular structures which are converted to numpy records. - -For example: - -```q -q)typeFunc:.pykx.eval"lambda x:print(type(x))" -q)typeFunc 1; - -q)typeFunc til 10; - -q)typeFunc ([]100?1f;100?1f); - -``` - -The default behaviour of the conversions which are undertaken when making function/method calls is controlled through the definition of `.pykx.i.defaultConv` - -```q -q).pykx.i.defaultConv -"np" -``` - -This can have one of the following values: - -| Python type | Value | -|-------------|-------| -| Python | "py" | -| Numpy | "np" | -| Pandas | "pd" | -| PyArrow | "pa" | -| PyKX | "k" | - -Taking the examples above for numpy we can update the default types across all function calls - -```q -q)typeFunc:.pykx.eval"lambda x:print(type(x))" -q).pykx.i.defaultConv:"py" -q)typeFunc 1; - -q)typeFunc til 10; - -q)typeFunc ([]100?1f;100?1f); - - -q).pykx.i.defaultConv:"pd" -q)typeFunc 1; - -q)typeFunc til 10; - -q)typeFunc ([]100?1f;100?1f); - - -q).pykx.i.defaultConv:"pa" -q)typeFunc 1; - -q)typeFunc til 10; - -q)typeFunc ([]100?1f;100?1f); - - -q).pykx.i.defaultConv:"k" -q)typeFunc 1; - -q)typeFunc til 10; - -q)typeFunc ([]100?1f;100?1f); - -``` - -Alternatively individual arguments to functions can be modified using the `.pykx.to*` functionality, for example in the following: - -```q -q)typeFunc:.pykx.eval"lambda x,y: [print(type(x)), print(type(y))]" -q)typeFunc[til 10;til 10]; // Simulate passing both arguments with defaults - - -q)typeFunc[til 10].pykx.topd til 10; // Pass in the second argument as Pandas series - - -q)typeFunc[.pykx.topa([]100?1f);til 10]; // Pass in first argument as PyArrow Table - - -q)typeFunc[.pykx.tok til 10;.pykx.tok ([]100?1f)]; // Pass in two PyKX objects - - -``` - -### Setting Python variables - -Variables can be set in Python `__main__` using `.pykx.set` - -```q -q).pykx.set[`var1;42] -q).pykx.qeval"var1" -42 -q).pykx.set[`var2;{x*2}] -q)qfunc:.pykx.get[`var2;<] -{[f;x].pykx.i.pykx[f;x]}[foreign]enlist -q)qfunc[3] -6 -``` - -## Function calls - - -Python allows for calling functions with - -- A variable number of arguments -- A mixture of positional and keyword arguments -- Implicit (default) arguments - -All of these features are available through the PyKX function-call interface. -Specifically: - -- Callable PyKX objects are variadic -- Default arguments are applied where no explicit arguments are given -- Individual keyword arguments are specified using the (infix) `pykw` operator -- A list of positional arguments can be passed using `pyarglist` (like Python \*args) -- A dictionary of keyword arguments can be passed using `pykwargs` (like Python \*\*kwargs) - -**Keyword arguments last** We can combine positional arguments, lists of positional arguments, keyword arguments and a dictionary of keyword arguments. However, _all_ keyword arguments must always follow _any_ positional arguments. The dictionary of keyword arguments (if given) must be specified last. - - -### Example function calls - -```q -q)p)import numpy as np -q)p)def func(a=1,b=2,c=3,d=4):return np.array([a,b,c,d,a*b*c*d]) -q)qfunc:.pykx.get[`func;<] / callable, returning q -``` - -Positional arguments are entered directly. -Function calling is variadic, so later arguments can be excluded. - -```q -q)qfunc[2;2;2;2] / all positional args specified -2 2 2 2 16 -q)qfunc[2;2] / first 2 positional args specified -2 2 3 4 48 -q)qfunc[] / no args specified -1 2 3 4 24 -q)qfunc[2;2;2;2;2] / error if too many args specified -'TypeError('func() takes from 0 to 4 positional arguments but 5 were given') - [0] qfunc[2;2;2;2;2] / error if too many args specified - ^ -``` - -Individual keyword arguments can be specified using the `pykw` operator (applied infix). -Any keyword arguments must follow positional arguments, but the order of keyword arguments does not matter. - -```q -q)qfunc[`d pykw 1;`c pykw 2;`b pykw 3;`a pykw 4] / all keyword args specified -4 3 2 1 24 -q)qfunc[1;2;`d pykw 3;`c pykw 4] / mix of positional and keyword args -1 2 4 3 24 -q)qfunc[`a pykw 2;`b pykw 2;2;2] / error if positional args after keyword args -'TypeError("func() got multiple values for argument 'a'") - [0] qfunc[`a pykw 1;pyarglist 2 2 2] - ^ -q)qfunc[`a pykw 2;`a pykw 2] / error if duplicate keyword args -'Expected only unique key names for keyword arguments in function call - [0] qfunc[`a pykw 2;`a pykw 2] - ^ -``` - -A list of positional arguments can be specified using `pyarglist` (similar to Python’s \*args). -Again, keyword arguments must follow positional arguments. - -```q -q)qfunc[pyarglist 1 1 1 1] / full positional list specified -1 1 1 1 1 -q)qfunc[pyarglist 1 1] / partial positional list specified -1 1 3 4 12 -q)qfunc[1;1;pyarglist 2 2] / mix of positional args and positional list -1 1 2 2 4 -q)qfunc[pyarglist 1 1;`d pykw 5] / mix of positional list and keyword args -1 1 3 5 15 -q)qfunc[pyarglist til 10] / error if too many args specified -'TypeError('func() takes from 0 to 4 positional arguments but 10 were given') - [0] qfunc[pyarglist til 10] / error if too many args specified - ^ -q)qfunc[`a pykw 1;pyarglist 2 2 2] / error if positional list after keyword args -'TypeError("func() got multiple values for argument 'a'") - [0] qfunc[`a pykw 1;pyarglist 2 2 2] - ^ -``` - - -A dictionary of keyword arguments can be specified using `pykwargs` (similar to Python’s \*\*kwargs). -If present, this argument must be the _last_ argument specified. - -```q -q)qfunc[pykwargs`d`c`b`a!1 2 3 4] / full keyword dict specified -4 3 2 1 24 -q)qfunc[2;2;pykwargs`d`c!3 3] / mix of positional args and keyword dict -2 2 3 3 36 -q)qfunc[`d pykw 1;`c pykw 2;pykwargs`a`b!3 4] / mix of keyword args and keyword dict -3 4 2 1 24 -q)qfunc[pykwargs`d`c!3 3;2;2] / error if keyword dict not last -'pykwargs last -q)qfunc[pykwargs`a`a!1 2] / error if duplicate keyword names -'dupnames -``` - -All 4 methods can be combined in a single function call, as long as the order follows the above rules. - -```q -q)qfunc[4;pyarglist enlist 3;`c pykw 2;pykwargs enlist[`d]!enlist 1] -4 3 2 1 24 -``` - -!!! warning "`pykw`, `pykwargs`, and `pyarglist`" - - Before defining functions containing `pykw`, `pykwargs`, or `pyarglist` within a script, the file `p.q` must be loaded explicitly. - Failure to do so will result in errors `'pykw`, `'pykwargs`, or `'pyarglist`. - -### Zero-argument calls - -In Python these two calls are _not_ equivalent: - -```python -func() #call with no arguments -func(None) #call with argument None -``` - -!!! warning "PyKX function called with `::` calls Python with no arguments" - - Although `::` in q corresponds to `None` in Python, if an PyKX function is called with `::` as its only argument, the corresponding Python function will be called with _no_ arguments. - -To call a Python function with `None` as its sole argument, retrieve `None` as a foreign object in q and pass that as the argument. - -```q -q)pynone:.pykx.eval"None" -q)pyfunc:.pykx.eval["print"] -q)pyfunc pynone; -None -``` - -Python | form | q ----------------|---------------------------|----------------------- -`func()` | call with no arguments | `func[]` or `func[::]` -`func(None)` | call with argument `None` | `func[.pykx.eval"None"]` - -!!! info "Q functions applied to empty argument lists" - - The _rank_ (number of arguments) of a q function is determined by its _signature_, - an optional list of arguments at the beginning of its definition. - If the signature is omitted, the default arguments are as many of - `x`, `y` and `z` as appear, and its rank is 1, 2, or 3. - - If it has no signature, and does not refer to `x`, `y`, or `z`, it has rank 1. - It is implicitly unary. - If it is then applied to an empty argument list, the value of `x` defaults to `(::)`. - - So `func[::]` is equivalent to `func[]` – and in Python to `func()`, not `func[None]`. - -### Printing or returning object representation - - -`.pykx.repr` returns the string representation of a Python object, either PyKX or foreign. This representation can be printed to stdout using `.pykx.print`. The usage of this function with a q object - -```q -q)x:.pykx.eval"{'a':1,'b':2}" -q).pykx.repr x -"{'a': 1, 'b': 2}" -q).pykx.print x -{'a': 1, 'b': 2} - -q).pykx.repr ([]5?1f;5?1f) -"x x1 \n-------------------\n0.3017723 0.3927524\n0.785033 0.5.. -q).pykx.print ([]5?1f;5?1f) -x x1 --------------------- -0.6137452 0.4931835 -0.5294808 0.5785203 -0.6916099 0.08388858 -0.2296615 0.1959907 -0.6919531 0.375638 -``` - -### Aliases in the root - - -For convenience, `pykx.q` defines `print` in the default namespace of q, as aliases for `.pykx.print`. To prevent the aliasing of this function please set either: - -1. `UNSET_PYKX_GLOBALS` as an environment variable. -2. `unsetPyKXGlobals` as a command line argument when initialising your q session. - -### Differences with embedPy 1.* - -1. EmbedPy did not discern any difference between handling string and symbol types when converting from q to Python. PyKX under q will convert symbols to Python `str` types while strings are converted to `bytes`. -2. EmbedPy provided functions to allow for the generation of closure and generator methods, these have been removed from support. -3. EmbedPy provided two helper functions `.p.pycallable` and `.p.qcallable` which allowed function returns to be set explicitly from a function, these have been removed. diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index 2852400..12f1cc3 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -73,22 +73,24 @@ The following variables can be used to enable or disable advanced features of Py | Option | Default | Values | Description | Status | |---------------------------------|---------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------| | `PYKX_BETA_FEATURES` | `False` | `1` or `true` | Enable all Beta features supplied with PyKX allowing users to test and prototype code slated for later releases. | | +| `PYKX_QDEBUG` | `False` | `1` or `true` | Enable retrieval of backtrace information on error being raised when executing q functions, this can alternatively be enabled by setting `debug=True` as a keyword in calls to `kx.q`. | | | `PYKX_IGNORE_QHOME` | `False` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | | | `PYKX_KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | | | `PYKX_ALLOCATOR` | `False` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. | | | `PYKX_GC` | `False` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the Numpy array at the cost of a small overhead. | | | `PYKX_LOAD_PYARROW_UNSAFE` | `False` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. | | | `PYKX_MAX_ERROR_LENGTH` | `256` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. | | -| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from Insights Core (https://code.kx.com/insights/1.2/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | | +| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from [Insights Core](https://code.kx.com/insights/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | | | `PYKX_Q_LIB_LOCATION` | `UNSET` | Path to a directory containing q libraries necessary for loading PyKX | See [here](../release-notes/changelog.md#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. | | | `PYKX_RELEASE_GIL` | `False` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. | | | `PYKX_Q_LOCK` | `False` | `1` or `true` | When PYKX_Q_LOCK is enabled a re-entrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be thread safe even when using PYKX_RELEASE_GIL. | | | `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `False` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. | | | `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. | | | `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. | | -| `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. | | +| `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. | | | `PYKX_SKIP_SIGNAL_OVERWRITE` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | | `PYKX_NO_SIGNAL` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | +| `PYKX_4_1_ENABLED` | `False` | `1` or `true` | Load version 4.1 of `libq` when starting `PyKX` in licensed mode, this environment variable does not work without a valid `q` license. | | | `PYKX_NO_SIGINT` | `False` | `1` or `true` | Avoid setting `signal.signal(signal.SIGINT)` once PyKX is loaded, these are presently set to the Python default values once PyKX is loaded to ensure that PyKX licensed modality does not block their use by Python. | `DEPRECATED`, please use `PYKX_NO_SIGNAL` | | `IGNORE_QHOME` | `True` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | `DEPRECATED`, please use `PYKX_IGNORE_QHOME` | | `KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | `DEPRECATED`, please use `PYKX_KEEP_LOCAL_TIMES` | @@ -144,5 +146,5 @@ PyKX can be loaded and used from a q session (see [here](../pykx-under-q/intro.m On Linux, the q Cloud Edition features, coming with Insights Core, can be used to read data from Cloud Storage (AWS S3, Google Cloud Storage, Azure Blob Storage). Credentials to access the Cloud Storage can be passed using specific environment variables. For more information, see the two following links: - https://code.kx.com/insights/core/objstor/main.html#environment-variables -- https://code.kx.com/insights/1.2/core/kurl/kurl.html#automatic-registration-using-credential-discovery +- https://code.kx.com/insights/core/kurl/kurl.html#automatic-registration-using-credential-discovery diff --git a/docs/user-guide/fundamentals/evaluating.md b/docs/user-guide/fundamentals/evaluating.md index 471f15d..ce31349 100644 --- a/docs/user-guide/fundamentals/evaluating.md +++ b/docs/user-guide/fundamentals/evaluating.md @@ -70,6 +70,54 @@ x x1 The application of arguments to functions within PyKX is limited to a maximum of 8 arguments. This limitation is imposed by the evaluation of q code. +Users wishing to debug failed evaluation of q code can do so either through usage of a `debug` keyword or by globally setting the environment variable `PYKX_QDEBUG`. + +=== "Global Setting" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'True' + >>> import pykx as kx + >>> kx.q('{x+1}', 'a') + backtrace: + [2] {x+1} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {if[y~();:(::)];2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 230, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + ``` + +=== "Per call debugging" + + ```python + >>> import pykx as kx + >>> kx.q('{x+1}', 'a', debug=True) + backtrace: + [2] {x+1} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {if[y~();:(::)];2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 230, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: type + ``` + ## Using the q console within PyKX For users more comfortable prototyping q code within a q terminal it is possible within a Python terminal to run an emulation of a q session directly in Python through use of the `kx.q.console` method. diff --git a/examples/subsciber/readme.md b/examples/subsciber/readme.md deleted file mode 100644 index 6c605a6..0000000 --- a/examples/subsciber/readme.md +++ /dev/null @@ -1,86 +0,0 @@ -# PyKX Subscribing to a `q` Process - -The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` to subscribe to a running q process. - -## Quickstart - -This example creates a python subscriber to a q process, that appends data received to the end of a table. - -Here we have: -2. A q process running on port 5001 -3. A Python process subscribing to the q process - -### Start the required q processes - -```q -// run q -$ q -p 5001 -q) -``` - -### Start the pykx subscriber - -```bash -// run the subscriber which will automatically connect -$ python subscriber.py -``` - -### Outcome - -What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are recieved. - -```q -// run q -$ q -p 5001 -q) -``` - -q process is started. - -```bash -// run the subscriber which will automatically connect -$ python subscriber.py -===== Initital Table ===== -a b ---- -4 8 -9 1 -2 9 -7 5 -0 4 -1 6 -9 6 -2 1 -1 8 -8 5 -===== Initital Table ===== - -``` - -Python process is started with a table, and it connects to the q server and sets the `py_server` variable. - -```q -q)py_server[1 2] - -``` - -Send a new table row (1, 2) to the python process from q. - -```python -Recieved new table row from q: 1 2 -a b ---- -4 8 -9 1 -2 9 -7 5 -0 4 -1 6 -9 6 -2 1 -1 8 -8 5 -1 2 -``` - -The new row has been appended to the table. diff --git a/examples/subsciber/subscriber.py b/examples/subsciber/subscriber.py deleted file mode 100644 index 5ab019a..0000000 --- a/examples/subsciber/subscriber.py +++ /dev/null @@ -1,44 +0,0 @@ -import pykx as kx - -import asyncio - - -table = kx.q('([] a: 10?10; b: 10?10)') - - -def assert_result(res): - # assert message from q process has the correct schema to be appended to the table - return type(res) is kx.LongVector and len(res) == 2 - - -async def main_loop(q): - global table - iters = 200 # only run a limited number of iterations for the example - while True: - await asyncio.sleep(0.5) # allows other async tasks to run along side - result = q.poll_recv() # this will return None if no message is available to be read - if assert_result(result): - print(f'Recieved new table row from q: {result}') - table = kx.q.upsert(table, result) - print(table) - result = None - iters -= 1 - if iters < 0: - break - - -async def main(): - global table - async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') - print(table) - print('===== Initital Table =====') - # Set the variable py_server on the q process pointing towards this processes IPC connection - # We use neg to ensure the messages are sent async so no reply is expected from this process - await q('py_server: neg .z.w') - - await main_loop(q) - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/examples/subscriber/readme.md b/examples/subscriber/readme.md index 6c605a6..71876fc 100644 --- a/examples/subscriber/readme.md +++ b/examples/subscriber/readme.md @@ -40,7 +40,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initital Table ===== +===== Initial Table ===== a b --- 4 8 @@ -53,7 +53,7 @@ a b 2 1 1 8 8 5 -===== Initital Table ===== +===== Initial Table ===== ``` diff --git a/examples/subscriber/subscriber.py b/examples/subscriber/subscriber.py index 5ab019a..e187d34 100644 --- a/examples/subscriber/subscriber.py +++ b/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/mkdocs.yml b/mkdocs.yml index 9bcaef0..fe1d2c0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,7 +3,7 @@ site_name: 'PyKX' site_author: 'KX' site_description: 'PyKX Documentation' site_url: 'https://code.kx.com/pykx' -copyright: 'â’¸ KX 2022' +copyright: '© 2024 Kx Systems, Inc. KX and kdb+ are registered trademarks of Kx Systems, Inc., a subsidiary of FD Technologies plc.' site_dir: 'public' dev_addr: 'localhost:8080' @@ -211,6 +211,7 @@ nav: - License management: api/license.md - Random data generation: api/random.md - Querying: api/query.md + - Compression and Encryption APIs: api/compress.md - Database Interactions: api/db.md - Remote Python Execution: api/remote.md - IPC: api/ipc.md @@ -225,6 +226,7 @@ nav: - Beta Features: - Introduction: beta-features/index.md - Database Management: beta-features/db-management.md + - Compression and Encryption: beta-features/compress-encypt.md - Remote Function Execution: beta-features/remote-functions.md - Multithreading: beta-features/threading.md - Python interfacing within q: @@ -245,6 +247,7 @@ nav: - PyKX: release-notes/changelog.md - PyKX under q: release-notes/underq-changelog.md - Roadmap: roadmap.md + - Blogs, Articles and Videos: blogs.md - Troubleshooting: troubleshooting.md - Frequently Asked Questions (FAQ): faq.md - Support: support.md diff --git a/pyproject.toml b/pyproject.toml index 9049736..35d6aa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,11 +48,12 @@ classifiers = [ "Typing :: Typed", ] dependencies = [ - "numpy~=1.20; python_version=='3.7'", - "numpy~=1.22; python_version=='3.8'", - "numpy~=1.22; python_version=='3.9'", - "numpy~=1.22; python_version=='3.10'", - "numpy~=1.23; python_version>='3.11'", + "numpy~=1.20, <2.0; python_version=='3.7'", + "numpy~=1.22, <2.0; python_version=='3.8'", + "numpy~=1.22, <2.0; python_version=='3.9'", + "numpy~=1.22, <2.0; python_version=='3.10'", + "numpy~=1.23, <2.0; python_version=='3.11'", + "numpy~=1.26, <2.0; python_version=='3.12'", "pandas>=1.2, < 2.2.0", "pytz>=2022.1", "toml~=0.10.2", @@ -91,9 +92,7 @@ lint = [ "pyproject-flake8==0.0.1a2", ] pyarrow = [ - # TODO: Fix pyarrow version 10.0 support - # TODO: Fix pyarrow support for python 3.11 - "pyarrow>=3.0.0, <10.0.0; python_version!='3.11'", + "pyarrow>=3.0.0", ] dashboards = [ "ast2json~=0.3", @@ -133,10 +132,13 @@ requires = [ "numpy~=1.22, <1.23; python_version=='3.9'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 "numpy~=1.22, <1.23; python_version=='3.10'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 "numpy~=1.23.2, <1.24; python_version=='3.11'", - "setuptools==60.9.3", - "setuptools-scm[toml]~=6.0.1", + "numpy~=1.26.0; python_version=='3.12'", + "setuptools~=68.0.0; python_version=='3.7'", + "setuptools~=69.0.2; python_version!='3.7'", + "setuptools-scm[toml]~=7.1.0; python_version=='3.7'", + "setuptools-scm[toml]~=8.0; python_version!='3.7'", "tomli>=2.0.1", - "wheel>=0.36.2", + "wheel>=0.42.0", ] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 9013469..2696be7 100755 --- a/setup.py +++ b/setup.py @@ -9,11 +9,10 @@ import setuptools # noqa: I100, F401 from contextlib import contextmanager # noqa: I100 -from contextlib import redirect_stderr -from distutils.command.clean import clean as default_clean -from distutils.ccompiler import new_compiler -from distutils.sysconfig import customize_compiler, get_python_inc -from glob import iglob + +from setuptools._distutils.ccompiler import new_compiler +from setuptools._distutils.sysconfig import customize_compiler, get_python_inc + import os from pathlib import Path import platform @@ -130,37 +129,8 @@ def build_q_c_extensions(self): self.build_q_c_extension(compiler, '_tcore', lib_ext, library=['pthread']) -class clean(default_clean): - """clean command that cleans some extra files and directories.""" - targets = (iglob(x) for x in ( - 'build', - 'dist', - '.eggs', - 'pykx.egg-info', - str(src_dir/'*.so*'), - str(src_dir/'build'), - )) - - def run(self): - with cd(script_dir): - for z in (x for y in self.targets for x in y): - rmrf(str(z)) - for f in src_dir.iterdir(): - if f.suffix == '.pyx': - rmrf(str(f.parent/(f.stem + '.c'))) - rmrf(str(f.parent/(f.stem + '.html'))) - with open(os.devnull, 'w') as devnull: - with redirect_stderr(devnull): - # This command has some useless/annoying output that can make it appear like - # there's an issue when there isn't, so we silence it. - super().run() - # If there's a real problem the error will propagate out, so we'll still see it - - def cythonize_extensions(extensions: List[Extension]) -> List[Extension]: """Convert .pyx/.pxd Extensions into regular .c/.cpp Extensions""" - if 'clean' in sys.argv: - return [] with cd(script_dir/'src'): cythonized = cythonize( extensions, @@ -243,6 +213,7 @@ def ext(name: str, define_macros=[ # Prevents the use of deprecated Numpy C APIs ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION'), + ('NPY_TARGET_VERSION', 'NPY_1_22_API_VERSION'), *((('CYTHON_TRACE', '1'), ('CYTHON_TRACE_NOGIL', '1')) if debug else ()) ], ) @@ -267,6 +238,7 @@ def ext(name: str, ] if system != 'Windows' else []), ]), ] + if py_minor_version >= 8: # python 3.8 or higher is required for NEP-49 exts.append(ext('_numpy', numpy=True, cython=False, libraries=['dl', *windows_libraries])) exts.append(ext('numpy_conversions', @@ -277,6 +249,12 @@ def ext(name: str, numpy=False, cython=False, libraries=['dl', *windows_libraries])) + with cd(src_dir/'lib'): + [ + shutil.copy(f, f'4-1-libs/{f}') + for f in + [str(f) for f in os.listdir() if os.path.isfile(f) and str(f) != 'q.k'] + ] setup( name=pyproject['name'], description=pyproject['description'], @@ -293,7 +271,6 @@ def ext(name: str, package_dir={'pykx': str(Path('src/pykx'))}, cmdclass={ 'build_ext': build_ext, - 'clean': clean, }, include_package_data=True, # makes setuptools use MANIFEST.in zip_safe=False, # required by Cython diff --git a/src/pykx/__init__.py b/src/pykx/__init__.py index 0f8632a..c756518 100644 --- a/src/pykx/__init__.py +++ b/src/pykx/__init__.py @@ -218,7 +218,7 @@ def _register(self, else: path = _first_resolved_path([''.join(x) for x in it.product( # `str(Path/@)[:-1]` gets the path with a trailing path separator - (str(x/'@')[:-1] for x in self.paths), + (str(x/Path('@'))[:-1] for x in self.paths), ('.', ''), (name,), ('.q', '.k'), @@ -289,6 +289,7 @@ def paths(self, paths: List[Union[str, Path]]): from ._ipc import _init as _ipc_init _ipc_init(q) +from .compress_encrypt import Compress, CompressionAlgorithm, Encrypt from .db import DB from .ipc import AsyncQConnection, QConnection, QFuture, RawQConnection, SecureQConnection, SyncQConnection # noqa from .config import qargs, qhome, qlic @@ -315,6 +316,9 @@ def paths(self, paths: List[Union[str, Path]]): from .remote import _init as _remote_init _remote_init(q) +from .compress_encrypt import _init as _compress_init +_compress_init(q) + if k_allocator: from . import _numpy as _pykx_numpy_cext diff --git a/src/pykx/compress_encrypt.py b/src/pykx/compress_encrypt.py new file mode 100644 index 0000000..b46fdf7 --- /dev/null +++ b/src/pykx/compress_encrypt.py @@ -0,0 +1,198 @@ +"""Functionality for the setting of compression and encryption configuration when + handling on-disk data. + +!!! Warning + + This functionality is provided in it's present form as a BETA + Feature and is subject to change. To enable this functionality + for testing please following configuration instructions + [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` +""" + +from . import beta_features +from .config import _check_beta + +from enum import Enum +from math import log2 +import os +from pathlib import Path + +__all__ = [ + 'CompressionAlgorithm', + 'Compress', + 'Encrypt', +] + +beta_features.append('Compression and Encryption') + + +def _init(_q): + global q + q = _q + + +def __dir__(): + return __all__ + + +class CompressionAlgorithm(Enum): + """ + The compression algorithm to be used when compressing a DB partition/column. + + Presently the supported algorithms are qipc, gzip, snappy, lz4hc and zstd. + These algorithms support different compression levels denoting the agressivness + of compression in each case. + + | algorithm | levels | + |-----------|--------| + | none | 0 | + | q IPC | 0 | + | gzip | 0-9 | + | snappy | 0 | + | lz4hc | 0-16 | + | zstd | -7-22 | + """ + none = 0 + ipc = 1 + gzip = 2 + snappy = 3 + lz4hc = 4 + zstd = 5 + + +_compression_ranges = { + CompressionAlgorithm.none: range(0, 1), + CompressionAlgorithm.ipc: range(0, 1), + CompressionAlgorithm.gzip: range(0, 10), + CompressionAlgorithm.snappy: range(0, 1), + CompressionAlgorithm.zstd: range(-7, 23), + CompressionAlgorithm.lz4hc: range(1, 17)} + + +class Encrypt(): + def __init__(self, path=None, password=None): + """ + Initialize a class object which is used to control the use of encryption with PyKX. + + Parameters: + path: Location of a users encryption key file as an 'str' object + password: Password which had been set for encryption file + + Example: + + ```python + >>> import pykx as kx + >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword') + ``` + """ + _check_beta('Compression and Encryption') + self.loaded = False + path = Path(os.path.abspath(path)) + if not os.path.isfile(path): + raise ValueError("Provided 'path' does not exist") + self.path = path + if password is None: + raise ValueError('Password provided is None, please provide a str object') + if not isinstance(password, str): + raise TypeError('Password must be supplied as a string') + self.password = password + + def load_key(self): + """ + Load the encyption key within your process, note this will be a global load. + + Example: + + ```python + >>> import pykx as kx + >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword') + >>> encrypt.load_key() + ``` + """ + q('{-36!(hsym x;y)}', self.path, bytes(self.password, 'UTF-8')) + self.loaded = True + + +class Compress(): + def __init__(self, + algo=CompressionAlgorithm.none, + block_size=2**17, + level=None): + """ + Initialize a class object which is used to control encryption within PyKX. + + Parameters: + algo: Compression algorithm to be used when applying compression, + this must be one of: + + - `kx.CompressionAlgorithm.none` + - `kx.CompressionAlgorithm.ipc` + - `kx.CompressionAlgorithm.gzip` + - `kx.CompressionAlgorithm.snappy` + - `kx.CompressionAlgorithm.lz4hc` + + block_size: Must be a port of 2 between 12 and 20 denoting the pageSize or + allocation granularity to 1MB, see + [here](https://code.kx.com/q/kb/file-compression/#compression-parameters) + for more information. + + level: The degree to which compression will be applied, when non zero values + are supported for a supported algorithm larger values will result in + higher compression ratios. + + Example: + + ```python + >>> import pykx as kx + >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=5) + ``` + """ + _check_beta('Compression and Encryption') + self.algorithm = algo + if block_size & (block_size - 1): + raise ValueError(f'block_size must be a power of 2, not {block_size}') + self.encrypt = False + self.block_size = int(log2(block_size)) + if (algo == CompressionAlgorithm.zstd) & q('.z.K<4.1').py(): + raise ValueError("'CompressionAlgorithm.zstd' only supported on PyKX>=4.1") + compression_range = _compression_ranges[algo] + if level is None: + level = compression_range.stop -1 + elif level not in compression_range: + raise ValueError( + f'Invalid level {level} for {algo} ' + f'algorithm. Valid range is {compression_range}') + self.compression_level = level + + def global_init(self, encrypt=False): + """ + Globally initialise compression settings, when completed any persistence + operation making use of `kx.q.set` will be compressed based on the user + specified compression settings + + Parameters: + encrypt: A `kx.Encrypt` object denoting if and using what credentials + encryption is to be applied. + + Example: + + ```python + >>> import pykx as kx + >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=2) + >>> kx.q.z.zd + pykx.Identity(pykx.q('::')) + >>> comp.global_init() + >>> kx.q.z.zd + pykx.LongVector(pykx.q('17 2 2')) + ``` + """ + if not self.encrypt: + if isinstance(encrypt, Encrypt): + if not encrypt.loaded: + encrypt.load_key() + self.encrypt = True + else: + self.encrypt = False + q.z.zd = [self.block_size, + self.algorithm.value + (16 if self.encrypt else 0), + self.compression_level] diff --git a/src/pykx/config.py b/src/pykx/config.py index 63af5d1..8136346 100644 --- a/src/pykx/config.py +++ b/src/pykx/config.py @@ -74,7 +74,8 @@ def _is_set(envvar): pykx_dir = Path(__file__).parent.resolve(strict=True) os.environ['PYKX_DIR'] = str(pykx_dir) os.environ['PYKX_EXECUTABLE'] = sys.executable -pykx_lib_dir = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_dir/'lib')) +pykx_libs_dir = Path(pykx_dir/'lib') if _get_config_value('PYKX_4_1_ENABLED', None) is None else Path(pykx_dir/'lib'/'4-1-libs') # noqa +pykx_lib_dir = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_libs_dir)) pykx_platlib_dir = pykx_lib_dir/q_lib_dir_name lib_prefix = '' if system == 'Windows' else 'lib' lib_ext = { @@ -213,9 +214,9 @@ def _license_install(intro=None, return_value=False): # noqa: licensed = False under_q = _is_enabled('PYKX_UNDER_Q') -qlib_location = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_dir/'lib')) +qlib_location = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_libs_dir)) pykx_threading = _is_enabled('PYKX_THREADING') -if platform.system() == 'Windows': +if platform.system() == 'Windows' and pykx_threading: pykx_threading = False warn('PYKX_THREADING is only supported on Linux / MacOS, it has been disabled.') no_sigint = _is_enabled('PYKX_NO_SIGINT', deprecated=True) @@ -246,6 +247,7 @@ def _license_install(intro=None, return_value=False): # noqa: no_qce = _is_enabled('PYKX_NOQCE', '--no-qce') beta_features = _is_enabled('PYKX_BETA_FEATURES', '--beta') load_pyarrow_unsafe = _is_enabled('PYKX_LOAD_PYARROW_UNSAFE', '--load-pyarrow-unsafe') +pykx_qdebug = _is_enabled('PYKX_QDEBUG', '--q-debug') pandas_2 = pd.__version__.split('.')[0] == '2' disable_pandas_warning = _is_enabled('PYKX_DISABLE_PANDAS_WARNING') @@ -294,7 +296,6 @@ def _check_beta(feature_name, *, status=beta_features): 'qargs', 'licensed', 'under_q', - 'qlib_location', 'ignore_qhome', 'keep_local_times', diff --git a/src/pykx/core.pyx b/src/pykx/core.pyx index 2a999cb..21efe10 100644 --- a/src/pykx/core.pyx +++ b/src/pykx/core.pyx @@ -9,7 +9,7 @@ import sys from . import beta_features from .util import num_available_cores -from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta +from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta, _get_config_value, pykx_lib_dir, ignore_qhome def _normalize_qargs(user_args: List[str]) -> Tuple[bytes]: @@ -47,13 +47,13 @@ def _normalize_qargs(user_args: List[str]) -> Tuple[bytes]: ) -cdef int _qinit(int (*qinit)(int, char**, char*, char*, char*), qhome_str: str, qlic_str: str, args: List[str]) except *: +cdef int _qinit(int (*qinit)(int, char**, char*, char*, char*), qhome_str: str, qlic_str: str, ignore_qhome: bool, args: List[str]) except *: normalized_args = _normalize_qargs(args) cdef int argc = len(normalized_args) cdef char** argv = PyMem_Malloc(sizeof(char*) * argc) for i, arg in enumerate(normalized_args): argv[i] = strncpy(PyMem_Malloc(len(arg) + 1), arg + b'\0', len(arg) + 1) - qhome_bytes = bytes(Path(__file__).parent.absolute()/'lib') + qhome_bytes = bytes(pykx_lib_dir) if ignore_qhome else qhome_str.encode() qlic_bytes = qlic_str.encode() init_code = qinit(argc, argv, qhome_bytes, qlic_bytes, NULL) os.environ['QHOME'] = qhome_str @@ -81,7 +81,7 @@ if qinit_check_data is not None: _libq_path = _libq_path_py # nocov _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) # nocov qinit = dlsym(_q_handle, 'qinit') # nocov - os._exit(_qinit(qinit, _qhome_str, _qlic_str, _qargs)) # nocov + os._exit(_qinit(qinit, _qhome_str, _qlic_str, ignore_qhome, _qargs)) # nocov from libc.string cimport strncpy @@ -92,10 +92,11 @@ from warnings import warn import subprocess import sys -from .config import find_core_lib, ignore_qhome, k_gc, qargs, qhome, qlic, pykx_lib_dir, \ +from .config import find_core_lib, k_gc, qargs, qhome, qlic, pykx_lib_dir, \ release_gil, _set_licensed, under_q, use_q_lock from .exceptions import PyKXException, PyKXWarning +final_qhome = str(qhome if ignore_qhome else pykx_lib_dir) if '--licensed' in qargs and '--unlicensed' in qargs: raise PyKXException("$QARGS includes mutually exclusive flags '--licensed' and '--unlicensed'") @@ -281,7 +282,7 @@ if not pykx_threading: **os.environ, 'PYKX_QINIT_CHECK': ';'.join(( str(_core_q_lib_path), - str(pykx_lib_dir if ignore_qhome is None else qhome), + final_qhome, str(qlic), # Use the env var directly because `config.qargs` has already split the args. os.environ.get('QARGS', ''), @@ -331,7 +332,7 @@ if not pykx_threading: # employed by q. try: _link_qhome() - except BaseException: + except BaseException as e: warn('Failed to link user QHOME directory contents to allow access to PyKX.\n' 'To suppress this warning please set the configuration option "PYKX_IGNORE_QHOME" as outlined at:\n' 'https://code.kx.com/pykx/user-guide/configuration.html') @@ -339,7 +340,7 @@ if not pykx_threading: _libq_path = _libq_path_py _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) qinit = dlsym(_q_handle, 'qinit') - qinit_return_code = _qinit(qinit, str(qhome if ignore_qhome else pykx_lib_dir), str(qlic), list(qargs)) + qinit_return_code = _qinit(qinit, final_qhome, str(qlic), ignore_qhome, list(qargs)) if qinit_return_code: # nocov dlclose(_q_handle) # nocov licensed = False # nocov @@ -357,7 +358,7 @@ else: init_syms(_libq_path) qinit = dlsym(_q_handle, 'q_init') - qinit_return_code = _qinit(qinit, str(qhome if ignore_qhome else pykx_lib_dir), str(qlic), list(qargs)) + qinit_return_code = _qinit(qinit, final_qhome, str(qlic), ignore_qhome, list(qargs)) if qinit_return_code: # nocov dlclose(_q_handle) # nocov licensed = False # nocov @@ -371,7 +372,7 @@ else: f'Non-zero qinit return code {qinit_return_code}, failed to initialize ' 'PYKX_THREADING.' ) # nocov - os.environ['QHOME'] = str(qhome if ignore_qhome else pykx_lib_dir) + os.environ['QHOME'] = final_qhome licensed = True _set_licensed(licensed) diff --git a/src/pykx/db.py b/src/pykx/db.py index 2eb77be..dfc0f1a 100644 --- a/src/pykx/db.py +++ b/src/pykx/db.py @@ -12,6 +12,7 @@ from . import wrappers as k from . import beta_features from .config import _check_beta +from .compress_encrypt import Compress, Encrypt import os from pathlib import Path @@ -48,12 +49,6 @@ def _check_column(cls, table, column): raise QError("Specified column '" + column + "' not present in table '" + table + "'") -def _check_table(cls, table): - if not k.PartitionedTable == type(getattr(cls.table, table)): # noqa: E721 - raise QError("Application of Database Management functionality only " - "supported for Partitioned Databases") - - _ktype_to_conversion = { k.GUIDAtom: "guid", k.BooleanAtom: "boolean", @@ -90,6 +85,7 @@ class _TABLES: class DB(_TABLES): """Singleton class used for the management of kdb+ Databases""" _instance = None + _init_tabs = None path = None tables = None table = _TABLES @@ -110,7 +106,8 @@ def __init__(self, *, path=None): pass def create(self, table, table_name, partition, *, # noqa: C901 - by_field=None, sym_enum=None, log=True): + by_field=None, sym_enum=None, log=True, + compress=None, encrypt=None): """ Create an on-disk partitioned table within a kdb+ database from a supplied `pykx.Table` object. Once generated this table will be accessible @@ -128,6 +125,11 @@ def create(self, table, table_name, partition, *, # noqa: C901 by the partitioning column) sym_enum: The name of the symbol enumeration table to be associated with the table log: Print information about status of partitioned datab + compress: `pykx.Compress` initialized class denoting the + compression settings to be used when persisting a partition/partitions + encrypt: `pykx.Encrypt` initialized class denoting the encryption setting to be used + when persisting a partition/partitions + Returns: A `None` object on successful invocation, the database class will be @@ -139,7 +141,7 @@ def create(self, table, table_name, partition, *, # noqa: C901 ```python >>> import pykx as kx - >>> db = kx.DB(path = 'newDB') + >>> db = kx.DB(path = '/tmp/newDB') >>> N = 1000 >>> qtab = kx.Table(data = { ... 'date': kx.q.asc(kx.random.random(N, kx.q('2020.01 2020.02 2020.03m'))), @@ -168,14 +170,14 @@ def create(self, table, table_name, partition, *, # noqa: C901 ```python >>> import pykx as kx - >>> db = kx.DB(path = 'newDB') + >>> db = kx.DB(path = '/tmp/newDB') >>> N = 333 >>> qtab = kx.Table(data = { ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), ... 'price': kx.random.random(N, 10.0), ... 'size': kx.random.random(N, 100) ... }) - >>> db.create(qtab, 'stocks', kx.q('2020.04'), by_field = 'sym', sym_enum = 'symbols') + >>> db.create(qtab, 'stocks', kx.q('2020.04m'), by_field = 'sym', sym_enum = 'symbols') >>> db.tables ['stocks'] >>> db.stocks @@ -190,6 +192,30 @@ def create(self, table, table_name, partition, *, # noqa: C901 .. ')) ``` + + Add a table as a partition to an on-disk database, in the example below we are + additionally applying gzip compression to the persisted table + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = '/tmp/newDB') + >>> N = 333 + >>> qtab = kx.Table(data = { + ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + ... 'price': kx.random.random(N, 10.0), + ... 'size': kx.random.random(N, 100) + ... }) + >>> compress = kx.Compress(kx.CompressionAlgorithm.gzip, level=2) + >>> db.create(qtab, 'stocks', kx.q('2020.04m'), compress=compress) + >>> kx.q('{-21!hsym x}', '/tmp/newDB/2020.04/stocks/price') + pykx.Dictionary(pykx.q(' + compressedLength | 2064 + uncompressedLength| 2680 + algorithm | 2i + logicalBlockSize | 17i + zipLevel | 2i + ')) + ``` """ save_dir = self.path func_name = 'dpfts' @@ -199,6 +225,18 @@ def create(self, table, table_name, partition, *, # noqa: C901 func_name = func_name.replace('f', '') if sym_enum is None: func_name = func_name.replace('s', '') + compression_cache = q.z.zd + if encrypt is not None: + if not isinstance(encrypt, Encrypt): + raise ValueError('Supplied encrypt object not an instance of pykx.Encrypt') + if not encrypt.loaded: + encrypt.load_key() + if compress is None: + compress = Compress() + if compress is not None: + if not isinstance(compress, Compress): + raise ValueError('Supplied compress parameter is not a pykx.Compress object') + compress.global_init(encrypt=encrypt) qfunc = q(_func_mapping[func_name]) try: if type(partition) == str: @@ -221,12 +259,14 @@ def create(self, table, table_name, partition, *, # noqa: C901 qfunc(save_dir, partition, by_field, table_name, sym_enum) except QError as err: q('{![`.;();0b;enlist x]}', table_name) + q.z.zd = compression_cache raise QError(err) q('{![`.;();0b;enlist x]}', table_name) + q.z.zd = compression_cache self.load(self.path, overwrite=True) return None - def load(self, path: Union[Path, str], *, overwrite=False): + def load(self, path: Union[Path, str], *, overwrite=False, encrypt=None): """ Load the tables associated with a kdb+ Database, once loaded a table is accessible as an attribute of the `DB` class or a sub attribute @@ -320,7 +360,11 @@ def load(self, path: Union[Path, str], *, overwrite=False): else: err_info = 'Unable to find object at specified path' raise QError('Loading of kdb+ databases can only be completed on folders: ' + err_info) - preloaded = self.tables + if encrypt is not None: + if not isinstance(encrypt, Encrypt): + raise ValueError('Supplied encrypt object not an instance of pykx.Encrypt') + if not encrypt.loaded: + encrypt.load_key() q(''' {[dbpath] @[system"l ",; @@ -331,9 +375,8 @@ def load(self, path: Union[Path, str], *, overwrite=False): ''', load_path) self.path = load_path self.loaded = True - tables = q.tables() - self.tables = tables.py() - for i in q('except', self.tables, preloaded).py(): + self.tables = q.Q.pt.py() + for i in self.tables: if hasattr(self, i): warn(f'A database table "{i}" would overwrite one of the pykx.DB() methods, please access your table via the table attribute') # noqa: E501 else: @@ -385,7 +428,6 @@ def rename_column(self, table, original_name, new_name): ``` """ _check_loading(self, table, 'Column rename') - _check_table(self, table) _check_column(self, table, original_name) q.dbmaint.renamecol(self.path, table, original_name, new_name) self._reload() @@ -430,7 +472,6 @@ def delete_column(self, table, column): ``` """ _check_loading(self, table, 'Column deletion') - _check_table(self, table) _check_column(self, table, column) q.dbmaint.deletecol(self.path, table, column) self._reload() @@ -465,7 +506,6 @@ def rename_table(self, original_name, new_name): ``` """ _check_loading(self, original_name, 'Table rename') - _check_table(self, original_name) q.dbmaint.rentable(self.path, original_name, new_name) # Remove the original table, without this it persists as an accessible table q('{![`.;();0b;enlist x]`}', original_name) @@ -497,7 +537,6 @@ def list_columns(self, table): ``` """ _check_loading(self, table, 'Column listing') - _check_table(self, table) return q.dbmaint.listcols(self.path, table).py() def add_column(self, table, column_name, default_value): @@ -531,7 +570,6 @@ def add_column(self, table, column_name, default_value): ``` """ _check_loading(self, table, 'Column addition') - _check_table(self, table) q.dbmaint.addcol(self.path, table, column_name, default_value) self._reload() return(None) @@ -585,7 +623,6 @@ def find_column(self, table, column_name): ``` """ _check_loading(self, table, 'Finding columns') - _check_table(self, table) return q.dbmaint.findcol(self.path, table, column_name).py() def reorder_columns(self, table, new_order): @@ -622,7 +659,6 @@ def reorder_columns(self, table, new_order): ``` """ _check_loading(self, table, 'Column reordering') - _check_table(self, table) q.dbmaint.reordercols(self.path, table, new_order) return None @@ -671,7 +707,6 @@ def set_column_attribute(self, table, column_name, new_attribute): ``` """ _check_loading(self, table, 'Attribute setting') - _check_table(self, table) _check_column(self, table, column_name) if new_attribute not in ['s', 'g', 'p', 'u', 'sorted', 'grouped', 'partitioned', 'unique']: @@ -729,7 +764,6 @@ def set_column_type(self, table, column_name, new_type): ``` """ _check_loading(self, table, 'Column casting') - _check_table(self, table) _check_column(self, table, column_name) if new_type not in _ktype_to_conversion: raise QError("Unable to find user specified conversion type: " + str(new_type)) @@ -786,7 +820,6 @@ def clear_column_attribute(self, table, column_name): ``` """ _check_loading(self, table, 'Attribute clearing') - _check_table(self, table) _check_column(self, table, column_name) q.dbmaint.clearattrcol(self.path, table, column_name) return None @@ -819,7 +852,6 @@ def copy_column(self, table, original_column, new_column): ``` """ _check_loading(self, table, 'Column copying') - _check_table(self, table) _check_column(self, table, original_column) q.dbmaint.copycol(self.path, table, original_column, new_column) self._reload() @@ -903,7 +935,6 @@ def apply_function(self, table, column_name, function): ``` """ _check_loading(self, table, 'Function application') - _check_table(self, table) _check_column(self, table, column_name) if not callable(function): raise RuntimeError("Provided 'function' is not callable") diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py index 2ec67ac..5101193 100644 --- a/src/pykx/embedded_q.py +++ b/src/pykx/embedded_q.py @@ -11,7 +11,7 @@ from . import toq from . import wrappers from . import schema -from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_threading, qargs, skip_under_q +from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_qdebug, pykx_threading, qargs, skip_under_q # noqa from .core import keval as _keval from .exceptions import FutureCancelled, LicenseException, NoResults, PyKXException, PyKXWarning, QError # noqa from ._wrappers import _factory as factory @@ -116,6 +116,7 @@ def __call__(self): class EmbeddedQ(Q, metaclass=ABCMetaSingleton): """Interface for q within the current process; can be called to execute q code.""" def __init__(self): # noqa + if licensed: kxic_path = (pykx_dir/'lib'/'kxic.k').as_posix() pykx_qlib_path = (pykx_dir/'pykx').as_posix() @@ -135,9 +136,9 @@ def __init__(self): # noqa if pykx_threading: warn('pykx.q is not supported when using PYKX_THREADING.') code += '@[get;`.pykx.i.kxic.loadfailed;{()!()}]' - kxic_loadfailed = self._call(code, debug=False).py() + kxic_loadfailed = self._call(code, skip_debug=True).py() if (not no_qce) and ('--no-sql' not in qargs): - sql = self._call('$[("insights.lib.sql" in " " vs .z.l 4)¬ `s in key`; @[system; "l s.k_";{x}];::]', debug=False).py() # noqa: E501 + sql = self._call('$[("insights.lib.sql" in " " vs .z.l 4)¬ `s in key`; @[system; "l s.k_";{x}];::]', skip_debug=True).py() # noqa: E501 if sql is not None: kxic_loadfailed['s.k'] = sql for lib, msg in kxic_loadfailed.items(): @@ -150,9 +151,9 @@ def __init__(self): # noqa and os.getenv('PYKX_UNDER_Q') is None ): os.environ['PYKX_Q_LOADED_MARKER'] = 'loaded' - self._call('setenv[`PYKX_Q_LOADED_MARKER; "loaded"]', debug=False) + self._call('setenv[`PYKX_Q_LOADED_MARKER; "loaded"]', skip_debug=True) try: - self._call('.Q.ld', debug=False) + self._call('.Q.ld', skip_debug=True) except QError as err: if '.Q.ld' in str(err): # .Q.ld is not defined on the server so we define it here @@ -160,12 +161,12 @@ def __init__(self): # noqa lines = f.readlines() for line in lines: if 'pykxld:' in line: - self._call('k).Q.' + line, debug=False) + self._call('k).Q.' + line, skip_debug=True) break else: raise err pykx_qini_path = (Path(__file__).parent.absolute()/'pykx_init.q_') - self._call(f'\l {pykx_qini_path}', debug=False) # noqa + self._call(f'\l {pykx_qini_path}', skip_debug=True) # noqa pykx_q_path = (Path(__file__).parent.absolute()/'pykx.q') with open(pykx_q_path, 'r') as f: code = f.read() @@ -173,9 +174,9 @@ def __init__(self): # noqa self._call( "{[code;file] value (@';last file;enlist[file],/:.Q.pykxld code)}", code, - b'pykx.q', debug=False + b'pykx.q', skip_debug=True ) - self._call('.pykx.setdefault[enlist"k"]', debug=False) + self._call('.pykx.setdefault[enlist"k"]', skip_debug=True) super().__init__() def __repr__(self): @@ -186,6 +187,7 @@ def __call__(self, *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False, **kwargs # since sync got removed this is added to ensure it doesn't break ) -> wrappers.K: """Run code in the q instance. @@ -212,19 +214,21 @@ def __call__(self, TypeError: Too many arguments were provided - q queries cannot have more than 8 parameters. """ + if not licensed: raise LicenseException("run q code via 'pykx.q'") if len(args) > 8: raise TypeError('Too many arguments - q queries cannot have more than 8 parameters') - if debug: + query = wrappers.CharVector(query) + if (not skip_debug) and (debug or pykx_qdebug): + if 0 != len(args): + query = wrappers.List([bytes(query), *[wrappers.K(x) for x in args]]) result = _keval( - bytes(wrappers.CharVector( - '{[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace:\n",.Q.sbt y;\'x}]}' - )), - wrappers.List([bytes(wrappers.CharVector(query)), *[wrappers.K(x) for x in args]]) + b'{[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace:\n",.Q.sbt y;\'x}]}', + query ) else: - result = _keval(bytes(wrappers.CharVector(query)), *[wrappers.K(x) for x in args]) + result = _keval(bytes(query), *[wrappers.K(x) for x in args]) if wait is None or wait: return factory(result, False) return self('::', wait=True) diff --git a/src/pykx/ipc.py b/src/pykx/ipc.py index f4fd86e..b98e1d5 100644 --- a/src/pykx/ipc.py +++ b/src/pykx/ipc.py @@ -30,14 +30,14 @@ import selectors import socket from threading import Lock as threading_lock -from time import monotonic_ns +from time import monotonic_ns, sleep from typing import Any, Callable, Optional, Union from warnings import warn from weakref import finalize, WeakMethod import sys from . import deserialize, serialize, Q -from .config import max_error_length, pykx_lib_dir, system +from .config import max_error_length, pykx_lib_dir, pykx_qdebug, system from .core import licensed from .exceptions import FutureCancelled, NoResults, PyKXException, QError, UninitializedConnection from .util import get_default_args, normalize_to_bytes, normalize_to_str @@ -123,7 +123,42 @@ def __await__(self) -> Any: except BaseException as e: self.set_exception(QError(str(e))) else: - self.q_connection._recv(acceptAsync=True) + try: + self.q_connection._recv(acceptAsync=True) + except BaseException as e: + if isinstance(e, QError): + raise e + if self.q_connection._connection_info['reconnection_attempts'] != -1: + self.q_connection._cancel_all_futures() + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self.q_connection._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self.q_connection._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 + # before the check to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self.q_connection._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) # noqa + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} ' + 'seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + break + else: + raise e yield from self super().__await__() return self.result() @@ -133,6 +168,8 @@ async def __async_await__(self) -> Any: return self.result() while not self.done(): await asyncio.sleep(0) + if self.done(): + return self.result() if self.poll_recv is not None: try: res = self.q_connection.poll_recv() @@ -141,14 +178,85 @@ async def __async_await__(self) -> Any: except BaseException as e: self.set_exception(QError(str(e))) else: - self.q_connection._recv(acceptAsync=True) + try: + self.q_connection._recv(acceptAsync=True) + except BaseException as e: + if isinstance(e, QError): + raise e + if self.q_connection._connection_info['reconnection_attempts'] != -1: + self.q_connection._cancel_all_futures() + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self.q_connection._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self.q_connection._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the + # check to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self.q_connection._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) # noqa + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} ' + 'seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + break + else: + raise e + if self.done(): + return self.result() return await self def _await(self) -> Any: if self.done(): return self.result() - while not self.done(): - self.q_connection._recv(locked=True, acceptAsync=True) + try: + while not self.done(): + self.q_connection._recv(locked=True, acceptAsync=True) + except BaseException as e: + if isinstance(e, QError): + raise e + if self._connection_info['reconnection_attempts'] != -1: + # TODO: Clear call stack futures + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the check + # to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + break + else: + raise e return self.result() def set_result(self, val: Any) -> None: @@ -190,7 +298,10 @@ def result(self) -> Any: if self._cancelled: raise FutureCancelled(self._cancelled_message) if self._result is not None: - if self._debug: + if self._cancelled_message != '': + print(f'Connection was lost no result', file=sys.stderr) + return None + if self._debug or pykx_qdebug: if self._result._unlicensed_getitem(0).py() == True: print((self._result._unlicensed_getitem(1).py()).decode(), file=sys.stderr) raise QError(self._result._unlicensed_getitem(2).py().decode()) @@ -198,6 +309,15 @@ def result(self) -> Any: return self._result._unlicensed_getitem(1) return self._result raise NoResults() + + def _disconnected(self): + if object.__getattribute__(self.q_connection, '_loop') is not None: + self.add_done_callback( + lambda x: print(f'Connection was lost no result', file=sys.stderr) + ) + self._result = 0 + self._cancelled_message = ' ' + self._done = True def done(self) -> bool: """ @@ -305,7 +425,8 @@ def __init__(self, unix: bool = False, wait: bool = True, lock: Optional[Union[threading_lock, multiprocessing_lock]] = None, - no_ctx: bool = False + no_ctx: bool = False, + reconnection_attempts: int = -1 ): """Interface with a q process using the q IPC protocol. @@ -332,6 +453,13 @@ def __init__(self, no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. + reconnection_attempts: This parameter specifies how many attempts will be made to + reconnect to the server if the connection is lost. The query will be resent if the + reconnection is successful. The default is -1 which will not attempt to reconnect, 0 + will continuosly attempt to reconnect to the server with no stop and an exponential + backoff between successive attempts. Any positive integer will specify the maximum + number of tries to reconnect before throwing an error if a connection can not be + made. Note: The `username` and `password` parameters are not required. The `username` and `password` parameters are only required if the q server requires @@ -350,6 +478,36 @@ def __init__(self, """ super().__init__() + def _create_connection_to_server(self): + object.__setattr__( + self, + '_handle', + _ipc.init_handle( + self._connection_info['host'], + self._connection_info['port'], + self._connection_info['credentials'], + self._connection_info['unix'], + self._connection_info['tls'], + self._connection_info['timeout'], + self._connection_info['large_messages'] + ) + ) + if not isinstance(self, SecureQConnection): + object.__setattr__( + self, + '_sock', + socket.fromfd( + self._handle, + socket.AF_INET, + socket.SOCK_STREAM + ) + ) + self._sock.setblocking(0) + object.__setattr__(self, '_reader', selectors.DefaultSelector()) + self._reader.register(self._sock, selectors.EVENT_READ, WeakMethod(self._recv_socket)) + object.__setattr__(self, '_writer', selectors.DefaultSelector()) + self._writer.register(self._sock, selectors.EVENT_WRITE, WeakMethod(self._send_sock)) + def _init(self, host: Union[str, bytes] = 'localhost', port: int = None, @@ -365,7 +523,10 @@ def _init(self, no_ctx: bool = False, as_server: bool = False, conn_gc_time: float = 0.0, + reconnection_attempts: int = -1 ): + credentials = f'{normalize_to_str(username, "Username")}:' \ + f'{normalize_to_str(password, "Password")}' object.__setattr__(self, '_connection_info', { 'host': host, 'port': port, @@ -373,6 +534,7 @@ def _init(self, 'password': password, 'timeout': timeout, 'large_messages': large_messages, + 'credentials': credentials, 'tls': tls, 'unix': unix, 'wait': wait, @@ -380,13 +542,12 @@ def _init(self, 'no_ctx': no_ctx, 'as_server': as_server, 'conn_gc_time': conn_gc_time, + 'reconnection_attempts': reconnection_attempts, }) if system == 'Windows' and unix: # nocov raise TypeError('Unix domain sockets cannot be used on Windows') if port is None or not isinstance(port, int): raise TypeError('IPC port must be provided') - credentials = f'{normalize_to_str(username, "Username")}:' \ - f'{normalize_to_str(password, "Password")}' object.__setattr__(self, '_lock', lock) object.__setattr__(self, 'closed', False) if isinstance(self, RawQConnection) and as_server: @@ -472,7 +633,8 @@ def _send(self, *params, wait: Optional[bool] = None, error=False, - debug=False + debug=False, + skip_debug=False ): if self.closed: raise RuntimeError("Attempted to use a closed IPC connection") @@ -484,7 +646,7 @@ def _send(self, events = self._writer.select(timeout) for key, _mask in events: callback = key.data - if debug: + if (not skip_debug) and (debug or pykx_qdebug): return callback()( key.fileobj, bytes(CharVector( @@ -612,10 +774,18 @@ def _recv_socket(self, sock): chunks = list(a) tot_bytes += 8 if len(chunks) == 0: - self.close() + try: + if self._connection_info['reconnection_attempts'] == -1: + self.close() + except BaseException: + self.close() raise RuntimeError("Attempted to use a closed IPC connection") elif len(chunks) <8: - self.close() + try: + if self._connection_info['reconnection_attempts'] == -1: + self.close() + except BaseException: + self.close() raise RuntimeError("PyKX attempted to process a message containing less than " "the expected number of bytes, connection closed." f"\nReturned bytes: {chunks}.\n" @@ -749,7 +919,8 @@ def __init__(self, unix: bool = False, wait: bool = True, lock: Optional[Union[threading_lock, multiprocessing_lock]] = None, - no_ctx: bool = False + no_ctx: bool = False, + reconnection_attempts: int = -1, ): """Interface with a q process using the q IPC protocol. @@ -775,6 +946,13 @@ def __init__(self, no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. + reconnection_attempts: This parameter specifies how many attempts will be made to + reconnect to the server if the connection is lost. The query will be resent if the + reconnection is successful. The default is -1 which will not attempt to reconnect, 0 + will continuosly attempt to reconnect to the server with no stop and an exponential + backoff between successive attempts. Any positive integer will specify the maximum + number of tries to reconnect before throwing an error if a connection can not be + made. Note: The `username` and `password` parameters are not required. The `username` and `password` parameters are only required if the q server requires @@ -812,6 +990,20 @@ def __init__(self, ```python pykx.SyncQConnection(port=5001, unix=True) ``` + + Automatically reconnect to a q server after a disconnect. + + ```python + >>> conn = kx.SyncQConnection(port=5001, reconnection_attempts=0) + >>> conn('til 10') + pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) + # server connection is lost here + >>> conn('til 10') + WARNING: Connection lost attempting to reconnect. + Failed to reconnect, trying again in 0.5 seconds. + Connection successfully reestablished. + pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) + ``` """ self._init(host, port, @@ -825,6 +1017,7 @@ def __init__(self, wait=wait, lock=lock, no_ctx=no_ctx, + reconnection_attempts=reconnection_attempts, ) super().__init__() @@ -833,6 +1026,7 @@ def __call__(self, *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False, ) -> K: """Evaluate a query on the connected q process over IPC. @@ -894,25 +1088,59 @@ def __call__(self, if wait is None: wait = self._connection_info['wait'] with self._lock if self._lock is not None else nullcontext(): - return self._call(query, *args, wait=wait, debug=debug) + return self._call(query, *args, wait=wait, debug=debug, skip_debug=skip_debug) def _call(self, query: Union[str, bytes], *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False, ) -> K: - self._send(query, *args, wait=wait, debug=debug) - if not wait: - return K(None) - res = self._recv(locked=True) - if not debug: - return res - if res._unlicensed_getitem(0).py() == True: - print((res._unlicensed_getitem(1).py()).decode(), file=sys.stderr) - raise QError(res._unlicensed_getitem(2).py().decode()) - else: - return res._unlicensed_getitem(1) + try: + self._send(query, *args, wait=wait, debug=debug, skip_debug=skip_debug) + if not wait: + return K(None) + res = self._recv(locked=True) + if skip_debug or not (debug or pykx_qdebug): + return res + if res._unlicensed_getitem(0).py() == True: + print((res._unlicensed_getitem(1).py()).decode(), file=sys.stderr) + raise QError(res._unlicensed_getitem(2).py().decode()) + else: + return res._unlicensed_getitem(1) + except BaseException as e: + if isinstance(e, QError): + raise e + if self._connection_info['reconnection_attempts'] != -1: + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the check + # to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + return self._call(query, *args, wait=wait, debug=debug) + else: + raise e def __enter__(self): return self @@ -973,7 +1201,8 @@ def __init__(self, wait: bool = True, lock: Optional[Union[threading_lock, multiprocessing_lock]] = None, event_loop: Optional[asyncio.AbstractEventLoop] = None, - no_ctx: bool = False + no_ctx: bool = False, + reconnection_attempts: int = -1, ): """Interface with a q process using the q IPC protocol. @@ -1003,6 +1232,13 @@ def __init__(self, no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. + reconnection_attempts: This parameter specifies how many attempts will be made to + reconnect to the server if the connection is lost. The query will not be resent if + the reconnection is successful. The default is -1 which will not attempt to + reconnect, 0 will continuosly attempt to reconnect to the server with no stop and an + exponential backoff between successive attempts. Any positive integer will specify + the maximum number of tries to reconnect before throwing an error if a connection + can not be made. Note: The `username` and `password` parameters are not required. The `username` and `password` parameters are only required if the q server requires @@ -1016,6 +1252,11 @@ def __init__(self, Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used. + Warning: AsyncQConnections will not resend queries that have not completed on reconnection. + When using the `reconnection_attempts` key word argument any queries that were not + complete before the connection was lost will have to be manually sent again after the + automatic reconnection. + Raises: PyKXException: Using both tls and unix is not possible with a QConnection. @@ -1039,6 +1280,33 @@ def __init__(self, ```python await pykx.AsyncQConnection(port=5001, unix=True) ``` + + Automatically reconnect to a q server after a disconnect. + + ```python + async def main(): + conn = await kx.AsyncQConnection( + port=5001, + event_loop=asyncio.get_event_loop(), + reconnection_attempts=0 + ) + print(await conn('til 10')) + # Connection lost here + # All unfinished futures are cancelled on connection loss + print(await conn('til 10')) # First call only causes a reconnection but wont send the query and returns none + print(await conn('til 10')) # Second one completes + + print(await conn('til 10')) + asyncio.run(main()) + + # Outputs + 0 1 2 3 4 5 6 7 8 9 + WARNING: Connection lost attempting to reconnect. + Connection successfully reestablished. + Connection was lost no result + None + 0 1 2 3 4 5 6 7 8 9 + ``` """ # TODO: Remove this once TLS support is fixed if tls: @@ -1057,6 +1325,7 @@ def __init__(self, 'lock': lock, 'loop': event_loop, 'no_ctx': no_ctx, + 'reconnection_attempts':reconnection_attempts, }) object.__setattr__(self, '_initialized', False) @@ -1073,7 +1342,8 @@ async def _async_init(self, wait: bool = True, lock: Optional[Union[threading_lock, multiprocessing_lock]] = None, event_loop: Optional[asyncio.AbstractEventLoop] = None, - no_ctx: bool = False + no_ctx: bool = False, + reconnection_attempts: int = -1, ): object.__setattr__(self, '_call_stack', []) self._init(host, @@ -1087,7 +1357,8 @@ async def _async_init(self, unix=unix, wait=wait, lock=lock, - no_ctx=no_ctx + no_ctx=no_ctx, + reconnection_attempts=reconnection_attempts, ) object.__setattr__(self, '_loop', event_loop) con_info = object.__getattribute__(self, '_connection_info') @@ -1098,21 +1369,28 @@ async def _async_init(self, async def _initobj(self): # nocov """Crutch used for `__await__` after spawning.""" if not self._initialized: - await self._async_init(self._stored_args['host'], - self._stored_args['port'], - *self._stored_args['args'], - username=self._stored_args['username'], - password=self._stored_args['password'], - timeout=self._stored_args['timeout'], - large_messages=self._stored_args['large_messages'], - tls=self._stored_args['tls'], - unix=self._stored_args['unix'], - wait=self._stored_args['wait'], - lock=self._stored_args['lock'], - event_loop=self._stored_args['loop'], - no_ctx=self._stored_args['no_ctx']) + await self._async_init( + self._stored_args['host'], + self._stored_args['port'], + *self._stored_args['args'], + username=self._stored_args['username'], + password=self._stored_args['password'], + timeout=self._stored_args['timeout'], + large_messages=self._stored_args['large_messages'], + tls=self._stored_args['tls'], + unix=self._stored_args['unix'], + wait=self._stored_args['wait'], + lock=self._stored_args['lock'], + event_loop=self._stored_args['loop'], + no_ctx=self._stored_args['no_ctx'], + reconnection_attempts=self._stored_args['reconnection_attempts'], + ) return self + def _cancel_all_futures(self): + [x._disconnected() for x in self._call_stack] + self._call_stack = [] + def __await__(self): return self._initobj().__await__() @@ -1205,11 +1483,49 @@ def __call__(self, else: if not self._initialized: raise UninitializedConnection() - with self._lock if self._lock is not None else nullcontext(): - q_future = self._send(query, *args, wait=wait, debug=debug) - if self._loop is None: + try: + with self._lock if self._lock is not None else nullcontext(): + q_future = self._send(query, *args, wait=wait, debug=debug) + if self._loop is None: + return q_future + return self._loop.create_task(q_future.__async_await__()) + except BaseException as e: + if isinstance(e, QError): + raise e + if self._connection_info['reconnection_attempts'] != -1: + self._cancel_all_futures() + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the check + # to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + break + + q_future = QFuture(self, self._connection_info['timeout'], debug) + q_future.set_result(K(None)) return q_future - return self._loop.create_task(q_future.__async_await__()) + else: + raise e def _call(self, query: Union[str, bytes], @@ -1217,8 +1533,42 @@ def _call(self, wait: Optional[bool] = None, debug: bool = False, ): - with self._lock if self._lock is not None else nullcontext(): - return self._send(query, *args, wait=wait, debug=debug)._await() + try: + with self._lock if self._lock is not None else nullcontext(): + return self._send(query, *args, wait=wait, debug=debug)._await() + except BaseException as e: + if isinstance(e, QError): + raise e + if self._connection_info['reconnection_attempts'] != -1: + self._cancel_all_futures() + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self._create_connection_to_server() + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the check + # to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + break + else: + raise e async def __aenter__(self): return await self @@ -1870,7 +2220,7 @@ def clean_open_connections(self): self.open_cons.pop(i) def poll_recv_async(self): - """Asynchronously recieve a query from the process connected to over IPC. + """Asynchronously receive a query from the process connected to over IPC. Raises: QError: Query timed out, may be raised if the time taken to make or receive a query goes @@ -2026,7 +2376,8 @@ def __init__(self, unix: bool = False, wait: bool = True, lock: Optional[Union[threading_lock, multiprocessing_lock]] = None, - no_ctx: bool = False + no_ctx: bool = False, + reconnection_attempts: int = -1, ): """Interface with a q process using the q IPC protocol. @@ -2053,6 +2404,13 @@ def __init__(self, no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. + reconnection_attempts: This parameter specifies how many attempts will be made to + reconnect to the server if the connection is lost. The query will be resent if the + reconnection is successful. The default is -1 which will not attempt to reconnect, 0 + will continuosly attempt to reconnect to the server with no stop and an exponential + backoff between successive attempts. Any positive integer will specify the maximum + number of tries to reconnect before throwing an error if a connection can not be + made. Note: The `username` and `password` parameters are not required. The `username` and `password` parameters are only required if the q server requires @@ -2091,6 +2449,7 @@ def __init__(self, wait=wait, lock=lock, no_ctx=no_ctx, + reconnection_attempts=reconnection_attempts, ) super().__init__() @@ -2169,6 +2528,19 @@ def __call__(self, # basis: q('{x set y+til z}', 'async_query', 10, 5, wait=True) ``` + + Automatically reconnect to a q server after a disconnect. + + ```python + >>> conn = kx.SecureQConnection(port=5001, reconnection_attempts=0) + >>> conn('til 10') + pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) + >>> conn('til 10') + WARNING: Connection lost attempting to reconnect. + Failed to reconnect, trying again in 0.5 seconds. + Connection successfully reestablished. + pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) + ``` """ return self._call(query, *args, wait=wait, debug=debug) @@ -2192,24 +2564,59 @@ def _call(self, raise ValueError('Cannot send Python function over IPC') handler = self._licensed_call if licensed else self._unlicensed_call - with self._lock if self._lock is not None else nullcontext(): - if debug: - res = handler( - handle, - normalize_to_bytes( - '{[pykxquery] .Q.trp[{[x] (0b; value x)}; pykxquery;' - '{(1b; "backtrace:\n",.Q.sbt y; x)}]}', - 'Query' - ), - [K(normalize_to_bytes(query, 'Query'))] if len(args) == 0 else [List([K(normalize_to_bytes(query, 'Query')), *args])], - wait, - ) - if res._unlicensed_getitem(0).py() == True: - print((res._unlicensed_getitem(1).py()).decode(), file=sys.stderr) - raise QError(res._unlicensed_getitem(2).py().decode()) - else: - return res._unlicensed_getitem(1) - return handler(handle, normalize_to_bytes(query, 'Query'), args, wait) + try: + with self._lock if self._lock is not None else nullcontext(): + if debug or pykx_qdebug: + res = handler( + handle, + normalize_to_bytes( + '{[pykxquery] .Q.trp[{[x] (0b; value x)}; pykxquery;' + '{(1b; "backtrace:\n",.Q.sbt y; x)}]}', + 'Query' + ), + [K(normalize_to_bytes(query, 'Query'))] if len(args) == 0 else [List([K(normalize_to_bytes(query, 'Query')), *args])], + wait, + ) + if res._unlicensed_getitem(0).py() == True: + print((res._unlicensed_getitem(1).py()).decode(), file=sys.stderr) + raise QError(res._unlicensed_getitem(2).py().decode()) + else: + return res._unlicensed_getitem(1) + return handler(handle, normalize_to_bytes(query, 'Query'), args, wait) + except BaseException as e: + if isinstance(e, QError) and 'snd handle' not in str(e) and 'write to handle' not in str(e) and 'close handle' not in str(e): + raise e + if self._connection_info['reconnection_attempts'] != -1: + print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) + loops = self._connection_info['reconnection_attempts'] + reconnection_delay = 0.5 + while True: + try: + self._create_connection_to_server() + if not licensed and self._handle == -1: + raise ConnectionError('Could not connect to q server') + except BaseException as err: + # attempts = 0 is infinite attempts as it will go to -1 before the check + # to break + loops -= 1 + if loops == 0: + print( + 'WARNING: Could not reconnect to server within ' + f'{self.q_connection._connection_info["reconnection_attempts"]} attempts.', + file=sys.stderr + ) + raise err + print( + f'Failed to reconnect, trying again in {reconnection_delay} seconds.', + file=sys.stderr + ) + sleep(reconnection_delay) + reconnection_delay *= 2 + continue + print('Connection successfully reestablished.', file=sys.stderr) + return self._call(query, *args, wait=wait, debug=debug) + else: + raise e def __enter__(self): return self diff --git a/src/pykx/lib/4-1-libs/bq.q_ b/src/pykx/lib/4-1-libs/bq.q_ new file mode 100644 index 0000000..eb8a30e Binary files /dev/null and b/src/pykx/lib/4-1-libs/bq.q_ differ diff --git a/src/pykx/lib/4-1-libs/csvutil.q b/src/pykx/lib/4-1-libs/csvutil.q new file mode 100644 index 0000000..2c7653f --- /dev/null +++ b/src/pykx/lib/4-1-libs/csvutil.q @@ -0,0 +1,113 @@ +/ utilities to quickly load a csv file - for more exhaustive analysis of the csv contents see csvguess.q +/ 2020.05.06 - bugfix for infolike and info0 +/ 2016.11.09 - add " " as valid delimiter in P +/ 2016.09.03 - allow HHMMSSXYZXYZXYZ N timestamps +/ 2014.08.07 - use .Q.id for colhdrs +/ 2014.01.27 - favour type P rather than Z +/ 2013.05.25 - tighten up U+V +/ 2012.07.11 - add GUID +/ 2009.09.20 - updated to match latest csvguess.q + +/ .csvutil.colhdrs[file] - return a list of colhdrs from file +/ info:.csvutil.info[file] - return a table of information about the file +/ columns are: +/ c - column name; ci - column index; t - load type; mw - max width; +/ dchar - distinct characters in values; rules - rules that caught the type +/ maybe - needs checking, _could_ be say a date, but perhaps just a float? +/ .csvutil.infoonly[file;onlycols] - like .csvutil.info except that it only analyses +/ example: +/ info:.csvutil.infoonly[file;`col0`col1`col3] +/ info:.csvutil.infolike[file;"*price"] +/ show delete from info where t=" " +/ .csvutil.data[file;info] - use the info from .csvutil.info to read the data +/ .csvutil.data10[file;info] - like .csvutil.data but only returns the first 10 rows +/ .csvutil.read[file]/read10[file] - for when you don't care about checking/tweaking the before reading + +\d .csvutil +DELIM:"," +ZAPHDRS:0b / lowercase and remove _ from colhdrs (junk characters are always removed) +WIDTHHDR:25000 / number of characters read to get the header +READLINES:222 / number of lines read and used to guess the types +SYMMAXWIDTH:11 / character columns narrower than this are stored as symbols +SYMMAXGR:10 / max symbol granularity% before we give up and keep as a * string +FORCECHARWIDTH:30 / every field (of any type) with values this wide or more is forced to character "*" +DISCARDEMPTY:0b / completely ignore empty columns if true else set them to "C" +CHUNKSIZE:50000000 / used in fs2 (modified .Q.fs) + +k)nameltrim:{$[~@x;.z.s'x;~(*x)in aA:.Q.a,.Q.A;(+/&\~x in aA)_x;x]} +k)fs2:{[f;s]((-7!s)>){[f;s;x]i:1+last@&0xa=r:1:(s;x;CHUNKSIZE);f@`\:i#r;x+i}[f;s]/0j} +cleanhdrs:{{$[ZAPHDRS;lower x except"_";x]}x where x in DELIM,.Q.an} +cancast:{nw:x$"";if[not x in"BXGCS";nw:(min 0#;max 0#;::)@\:nw];$[not any nw in x$(11&count y)#y;$[11.csvutil.FORCECHARWIDTH; / long values + info:update t:"C "[.csvutil.DISCARDEMPTY],(rules:rules,'30),empty:1b from info where t="?",mw=0; / empty columns + info:update dchar:{asc distinct raze x}peach sdv from info where t="?"; + info:update mdot:{max sum each"."=x}peach sdv from info where t="?",{"."in x}each dchar; + info:update t:"n",(rules:rules,'40)from info where t="?",{any x in"0123456789"}each dchar; / vaguely numeric.. + info:update t:"I",(rules:rules,'50),ipa:1b from info where t="n",mw within 7 15,mdot=3,{all x in".0123456789"}each dchar,.csvutil.cancast["I"]peach sdv; / ip-address + info:update t:"F",(rules:rules,'51)from info where t="n",mw>2,mdot<2,{all" /"in x}each dchar,.csvutil.cancast["F"]peach sdv; / fractions, "1 3/4" -> 1.75f + info:update t:"G",(rules:rules,'52) from info where t="*",mw=36,mdot=0,{all x like"????????-????-????-????-????????????"}peach sdv,.csvutil.cancast["G"]peach sdv; / GUID, v3.0 or later + info:update t:"N",(rules:rules,'53),maybe:1b from info where t="n",mw=15,mdot=0,{all x in"0123456789"}each dchar,.csvutil.cancast["N"]peach sdv; / N, could be T but that'd loose precision + info:update t:"T",(rules:rules,'54),maybe:1b from info where t="n",mw=9,mdot=0,{all x in"0123456789"}each dchar,.csvutil.cancast["T"]peach sdv; + info:update t:"G",(rules:rules,'55) from info where t="*",mw=38,mdot=0,{all x like"{????????-????-????-????-????????????}"}peach sdv,.csvutil.cancast["G"]peach sdv; / GUID, v3.0 or later + info:update t:"J",(rules:rules,'60)from info where t="n",mdot=0,{all x in"+-0123456789"}each dchar,.csvutil.cancast["J"]peach sdv; + info:update t:"I",(rules:rules,'70)from info where t="J",mw<12,.csvutil.cancast["I"]peach sdv; + info:update t:"H",(rules:rules,'80)from info where t="I",mw<7,.csvutil.cancast["H"]peach sdv; + info:update t:"F",(rules:rules,'90)from info where t="n",mdot<2,mw>1,.csvutil.cancast["F"]peach sdv; + info:update t:"E",(rules:rules,'100),maybe:1b from info where t="F",mw<9; + info:update t:"M",(rules:rules,'110),maybe:1b from info where t in"nIHEF",mdot<2,mw within 4 7,.csvutil.cancast["M"]peach sdv; + info:update t:"D",(rules:rules,'120),maybe:1b from info where t in"nI",mdot in 0 2,mw within 6 11,.csvutil.cancast["D"]peach sdv; + info:update t:"V",(rules:rules,'130),maybe:1b from info where t="I",mw=6,{all x like"[012][0-9][0-5][0-9][0-5][0-9]"}peach sdv,.csvutil.nostar["V"]peach sdv; / 235959 123456 + info:update t:"U",(rules:rules,'140),maybe:1b from info where t="H",mw=4,{all x like"[012][0-9][0-5][0-9]"}peach sdv,.csvutil.nostar["U"]peach sdv; /2359 + info:update t:"U",(rules:rules,'150),maybe:0b from info where t="n",mw in 4 5,mdot=0,{all x like"*[0-9]:[0-5][0-9]"}peach sdv,.csvutil.cancast["U"]peach sdv; + info:update t:"T",(rules:rules,'160),maybe:0b from info where t="n",mw within 7 12,mdot<2,{all x like"*[0-9]:[0-5][0-9]:[0-5][0-9]*"}peach sdv,.csvutil.cancast["T"]peach sdv; + info:update t:"V",(rules:rules,'170),maybe:0b from info where t="T",mw in 7 8,mdot=0,.csvutil.cancast["V"]peach sdv; + info:update t:"T",(rules:rules,'180),maybe:1b from info where t in"EF",mw within 7 10,mdot=1,{all x like"*[0-9][0-5][0-9][0-5][0-9].*"}peach sdv,.csvutil.cancast["T"]peach sdv; + / info:update t:"Z",(rules:rules,'190),maybe:0b from info where t="n",mw within 11 24,mdot<4,.csvutil.cancast["Z"]peach sdv; + info:update t:"P",(rules:rules,'200),maybe:1b from info where t="n",mw within 11 29,mdot<4,{all x like"[12][0-9][0-9][0-9][ ./-][01][0-9][ ./-][0-3][0-9]*"}peach sdv,.csvutil.cancast["P"]peach sdv; + info:update t:"N",(rules:rules,'210),maybe:1b from info where t="n",mw within 3 28,mdot=1,.csvutil.cancast["N"]peach sdv; + info:update t:"?",(rules:rules,'220),maybe:0b from info where t="n"; / reset remaining maybe numeric + info:update t:"C",(rules:rules,'230),maybe:0b from info where t="?",mw=1; / char + info:update t:"D",(rules:rules,'231),maybe:0b from info where t="?",mdot=0,mw within 5 9,{all x like"*[0-9][a-sA-S][a-uA-U][b-yB-Y][0-9][0-9]*"}peach sdv,.csvutil.cancast["D"]peach sdv; / 1dec12..01dec2011 + info:update t:"B",(rules:rules,'240),maybe:0b from info where t in"HC",mw=1,mdot=0,{$[all x in"01tTfFyYnN";(any"0fFnN"in x)and any"1tTyY"in x;0b]}each dchar; / boolean + info:update t:"B",(rules:rules,'250),maybe:1b from info where t in"HC",mw=1,mdot=0,{all x in"01tTfFyYnN"}each dchar; / boolean + info:update t:"X",(rules:rules,'260),maybe:0b from info where t="?",mw=2,{$[all x in"0123456789abcdefABCDEF";(any .Q.n in x)and any"abcdefABCDEF"in x;0b]}each dchar; /hex + info:update t:"S",(rules:rules,'270),maybe:1b from info where t="?",mw<.csvutil.SYMMAXWIDTH,mw>1,gr<.csvutil.SYMMAXGR; / symbols (max width permitting) + info:update t:"*",(rules:rules,'280),maybe:0b from info where t="?"; / the rest as strings + / flag those S/* columns which could be encoded to integers (.Q.j10/x10/j12/x12) to avoid symbols + info:update j12:1b from info where t in"S*",mw<13,{all x in .Q.nA}each dchar; + info:update j10:1b from info where t in"S*",mw<11,{all x in .Q.b6}each dchar; + select c,ci,t,maybe,empty,res,j10,j12,ipa,mw,mdot,rules,gr,ndv,dchar from info} +info:info0[;()] / by default don't restrict columns +infolike:{[file;pattern] info0[file;{x where(lower x)like lower y}[colhdrs[file];pattern]]} / .csvutil.infolike[file;"*time"] +infoonly:info0 / only some columns .csvutil.infoonly[file;`this`and`that] + +\d . diff --git a/src/pykx/lib/4-1-libs/dbmaint.q b/src/pykx/lib/4-1-libs/dbmaint.q new file mode 100644 index 0000000..78fbc9e --- /dev/null +++ b/src/pykx/lib/4-1-libs/dbmaint.q @@ -0,0 +1,151 @@ +/ kdb+ partitioned database maintenance +\d .os +WIN:.z.o in`w32`w64 +pth:{p:$[10h=type x;x;string x];if[WIN;p[where"/"=p]:"\\"];(":"=first p)_ p} +cpy:{system$[WIN;"copy /v /z ";"cp "],pth[x]," ",pth y} +del:{system$[WIN;"del ";"rm "],pth x} +ren:{system$[WIN;"move ";"mv "],pth[x]," ",pth y} +here:{hsym`$system$[WIN;"cd";"pwd"]} +\d . + +\d .dbmaint +add1col:{[tabledir;colname;defaultvalue] + if[not colname in ac:allcols tabledir; + stdout"adding column ",(string colname)," (type ",(string type defaultvalue),") to `",string tabledir; + num:count get(`)sv tabledir,first ac; + .[(`)sv tabledir,colname;();:;num#defaultvalue]; + @[tabledir;`.d;,;colname]]} + +allcols:{[tabledir]get tabledir,`.d} + +allpaths:{[dbdir;table] + files:key dbdir; + if[any files like"par.txt";:raze allpaths[;table]each hsym each`$read0(`)sv dbdir,`par.txt]; + files@:where files like"[0-9]*";(`)sv'dbdir,'files,'table} + +copy1col:{[tabledir;oldcol;newcol] + if[(oldcol in ac)and not newcol in ac:allcols tabledir; + stdout"copying ",(string oldcol)," to ",(string newcol)," in `",string tabledir; + .os.cpy[(`)sv tabledir,oldcol;(`)sv tabledir,newcol];@[tabledir;`.d;,;newcol]]} + +delete1col:{[tabledir;col] + if[col in ac:allcols tabledir; + stdout"deleting column ",(string col)," from `",string tabledir; + .os.del[(`)sv tabledir,col];@[tabledir;`.d;:;ac except col]]} + +/ +enum:{[tabledir;val] + if[not 11=abs type val;:val]; + .[p;();,;u@:iasc u@:where not(u:distinct enlist val)in v:$[type key p:(`)sv tabledir,`sym;get p;0#`]];`sym!(v,u)?val} +\ + +enum:{[tabledir;val]if[not 11=abs type val;:val];.Q.dd[tabledir;`sym]?val} + + +find1col:{[tabledir;col] + $[col in allcols tabledir; + [stdout"column ",string[col]," (type ",(string first"i"$read1((`)sv tabledir,col;8;1)),") in `",string tabledir;1b]; + [stdout"column ",string[col]," *NOT*FOUND* in `",string tabledir;0b]]} + +fix1table:{[tabledir;goodpartition;goodpartitioncols] + if[count missing:goodpartitioncols except allcols tabledir; + stdout"fixing table `",string tabledir;{add1col[x;z;0#get y,z]}[tabledir;goodpartition]each missing]} + +fn1col:{[tabledir;col;fn] + if[col in allcols tabledir; + oldattr:-2!oldvalue:get p:tabledir,col; + newattr:-2!newvalue:fn oldvalue; + if[$[not oldattr~newattr;1b;not oldvalue~newvalue]; + stdout"resaving column ",(string col)," (type ",(string type newvalue),") in `",string tabledir; + oldvalue:0;.[(`)sv p;();:;newvalue]]]} + +reordercols0:{[tabledir;neworder] + if[not((count ac)=count neworder)or all neworder in ac:allcols tabledir;'`order]; + stdout"reordering columns in `",string tabledir; + @[tabledir;`.d;:;neworder]} + +rename1col:{[tabledir;oldname;newname] + if[(oldname in ac)and not newname in ac:allcols tabledir; + stdout"renaming ",(string oldname)," to ",(string newname)," in `",string tabledir; + .os.ren[` sv tabledir,oldname;` sv tabledir,newname];@[tabledir;`.d;:;.[ac;where ac=oldname;:;newname]]]} + +ren1table:{[old;new]stdout"renaming ",(string old)," to ",string new;.os.ren[old;new];} + +add1table:{[dbdir;tablename;table] + stdout"adding ",string tablename; + @[tablename;`;:;.Q.en[dbdir]0#table];} + +stdout:{-1 raze[" "sv string`date`second$.z.P]," ",x;} +validcolname:{(not x in `i,.Q.res,key`.q)and x = .Q.id x} + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// * public + +thisdb:`:. / if functions are to be run within the database instance then use (`:.) as dbdir + +addcol:{[dbdir;table;colname;defaultvalue] / addcol[`:/data/taq;`trade;`noo;0h] + if[not validcolname colname;'(`)sv colname,`invalid.colname]; + add1col[;colname;enum[dbdir;defaultvalue]]each allpaths[dbdir;table];} + +castcol:{[dbdir;table;col;newtype] / castcol[thisdb;`trade;`size;`short] + fncol[dbdir;table;col;newtype$]} + +clearattrcol:{[dbdir;table;col] / clearattr[thisdb;`trade;`sym] + setattrcol[dbdir;table;col;(`)]} + +copycol:{[dbdir;table;oldcol;newcol] / copycol[`:/k4/data/taq;`trade;`size;`size2] + if[not validcolname newcol;'(`)sv newcol,`invalid.newname]; + copy1col[;oldcol;newcol]each allpaths[dbdir;table];} + +deletecol:{[dbdir;table;col] / deletecol[`:/k4/data/taq;`trade;`iz] + delete1col[;col]each allpaths[dbdir;table];} + +findcol:{[dbdir;table;col] / findcol[`:/k4/data/taq;`trade;`iz] + fndcols:find1col[;col]each allpaths[dbdir;table]; + if[not any fndcols;'"Requested column not found in all partitions, see log output above"]} + +/ adds missing columns, but DOESN'T delete extra columns - do that manually +fixtable:{[dbdir;table;goodpartition] / fixtable[`:/k4/data/taq;`trade;`:/data/taq/2005.02.19] + fix1table[;goodpartition;allcols goodpartition]each allpaths[dbdir;table]except goodpartition;} + +fncol:{[dbdir;table;col;fn] / fncol[thisdb;`trade;`price;2*] + fn1col[;col;fn]each allpaths[dbdir;table];} + +listcols:{[dbdir;table] / listcols[`:/k4/data/taq;`trade] + allcols first allpaths[dbdir;table]} + +renamecol:{[dbdir;table;oldname;newname] / renamecol[`:/k4/data/taq;`trade;`woz;`iz] + if[not validcolname newname;'` sv newname,`invalid.newname]; + rename1col[;oldname;newname]each allpaths[dbdir;table];} + +reordercols:{[dbdir;table;neworder] / reordercols[`:/k4/data/taq;`trade;reverse cols trade] + reordercols0[;neworder]each allpaths[dbdir;table];} + +setattrcol:{[dbdir;table;col;newattr] / setattr[thisdb;`trade;`sym;`g] / `s `p `u + fncol[dbdir;table;col;newattr#]} + +addtable:{[dbdir;tablename;table] / addtable[`:.;`trade;([]price...)] + add1table[dbdir;;table]each allpaths[dbdir;tablename];} + +rentable:{[dbdir;old;new] / rentable[`:.;`trade;`transactions] + ren1table'[allpaths[dbdir;old];allpaths[dbdir;new]];} + +\d . +\ +test with https://github.com/KxSystems/kdb/blob/master/tq.q (sample taq database) + +if making changes to current database you need to reload (\l .) to make modifications visible + +if the database you've been modifying is a tick database don't forget to adjust the schema (tick/???.q) to reflect your changes to the data + + +.dbmaint.addcol[`:.;`trade;`num;10] +.dbmaint.addcol[`:.;`trade;`F;`test] +.dbmaint.delete1col[`:./2000.10.02/trade;`F] +.dbmaint.fixtable[`:.;`trade;`:./2000.10.03/trade] +.dbmaint.reordercols[`:.;`quote;except[2 rotate cols quote;`date]] +.dbmaint.clearattrcol[`:.;`trade;`sym] +.dbmaint.setattrcol[`:.;`trade;`sym;`p] +.dbmaint.castcol[`:.;`trade;`time;`second] +.dbmaint.renamecol[`:.;`trade;`price;`PRICE] +`PRICE`size .dbmaint.renamecol[`:.;`trade]'`p`s diff --git a/src/pykx/lib/4-1-libs/kurl.q_ b/src/pykx/lib/4-1-libs/kurl.q_ new file mode 100644 index 0000000..d7fe527 Binary files /dev/null and b/src/pykx/lib/4-1-libs/kurl.q_ differ diff --git a/src/pykx/lib/4-1-libs/kurl.sidecar.q_ b/src/pykx/lib/4-1-libs/kurl.sidecar.q_ new file mode 100644 index 0000000..5c5b03f Binary files /dev/null and b/src/pykx/lib/4-1-libs/kurl.sidecar.q_ differ diff --git a/src/pykx/lib/4-1-libs/kxic.k b/src/pykx/lib/4-1-libs/kxic.k new file mode 100644 index 0000000..b7239f3 --- /dev/null +++ b/src/pykx/lib/4-1-libs/kxic.k @@ -0,0 +1,17 @@ +.pykx.i.kxic.loadfailed:()!(); +.comkxic.Kf:{[v]r:,/'$2\:'ri:"I"$raw:"."\:v:v@&v in .Q.n,".";if[max "b"$ri>1023;'version];vi:sum/ 2/:'.:'"0"^(r[0],20#"0";-20$r[1],10#"0";-10$r 2),\:"b";"***III"$`K`k`Kl`major`minor`patch!enlist[v],enlist["D"$"2022.03.10"],vi,raw}; +.[`.comkxic;();,;.comkxic.Kf "3.0.0"]; +.[`.comkxic.libs;();,;`nelf`kurl`objstor`qlog`restserver`bq`sql`l64!("6a93b2b";"7fa6ae8";"32363d0";"a49dc65";"00a5b89";"14d547d";"99f3988";"3fa87f1")]; +if[(.z.o~`l64)&$[0=#.z.x;1b;~"--no-qce"in .z.x]; + {[] + lf:{@[{."\\l ",$[""~x:getenv`QHOME;getenv[`HOME],"/q";x],"/",($x),"_"};x;{.pykx.i.kxic.loadfailed,:enlist[x]!enlist y}[x]]}; + d:`kurl.q`objstor.q`qlog.q`s.k!$`insights.lib.kurl`insights.lib.objstore`insights.lib.qlog`insights.lib.sql; + if[`s in !`;d:d _ `s.k]; + if[~0=#.z.x; + if["--no-kurl"in .z.x;d:d _ `kurl.q]; + if["--no-objstor"in .z.x;d:d _ `objstor.q]; + if["--no-qlog"in .z.x;d:d _ `qlog.q]; + if["--no-sql"in .z.x;d:d _ `s.k]]; + if[~0=#d;lf' (!d) @ & (.:d) in " "\:.z.l 4;]; / only load enabled features + }[] + ]; diff --git a/src/pykx/lib/4-1-libs/l64/libq.so b/src/pykx/lib/4-1-libs/l64/libq.so new file mode 100755 index 0000000..b654f7e Binary files /dev/null and b/src/pykx/lib/4-1-libs/l64/libq.so differ diff --git a/src/pykx/lib/4-1-libs/l64arm/libq.so b/src/pykx/lib/4-1-libs/l64arm/libq.so new file mode 100755 index 0000000..8eaf7ea Binary files /dev/null and b/src/pykx/lib/4-1-libs/l64arm/libq.so differ diff --git a/src/pykx/lib/4-1-libs/licmet.q_ b/src/pykx/lib/4-1-libs/licmet.q_ new file mode 100644 index 0000000..68d39c0 Binary files /dev/null and b/src/pykx/lib/4-1-libs/licmet.q_ differ diff --git a/src/pykx/lib/4-1-libs/m64/libq.dylib b/src/pykx/lib/4-1-libs/m64/libq.dylib new file mode 100755 index 0000000..7d1505c Binary files /dev/null and b/src/pykx/lib/4-1-libs/m64/libq.dylib differ diff --git a/src/pykx/lib/4-1-libs/m64arm/libq.dylib b/src/pykx/lib/4-1-libs/m64arm/libq.dylib new file mode 100755 index 0000000..ef9b5dc Binary files /dev/null and b/src/pykx/lib/4-1-libs/m64arm/libq.dylib differ diff --git a/src/pykx/lib/4-1-libs/objstor.q_ b/src/pykx/lib/4-1-libs/objstor.q_ new file mode 100644 index 0000000..2c38917 --- /dev/null +++ b/src/pykx/lib/4-1-libs/objstor.q_ @@ -0,0 +1,5 @@ + +¤üV%þ% ;¦k¦';ù[¹Äˆ¤ÄV Ý,6Ä ²…GL½–GËrG³'‡–[FËþ퇊,k +õ}½ +L–½GJ¦Ës†['dñ'›f 6V–– fÃí +› ÚˆhäLÓC6‡õ \ No newline at end of file diff --git a/src/pykx/lib/4-1-libs/q.k b/src/pykx/lib/4-1-libs/q.k new file mode 100644 index 0000000..593a7d5 --- /dev/null +++ b/src/pykx/lib/4-1-libs/q.k @@ -0,0 +1,266 @@ +\d .q +/each: +-*%&|^<>=$ <= >= <> @ ? in within bin div abs log exp sqrt sin cos tan f' f\: f/: +neg:-:;not:~:;null:^:;string:$:;reciprocal:%:;floor:_:;ceiling:-_-:;signum:{(x>0)-x<0} +mod:{x-y*x div y};xbar:{$[9h=t:abs[@x];x*r+y=x*1+r:(y:9h$y)div x;x*y div x:$[16h=t;"j"$x;x]]};xlog:{log[y]%log x};and:&;or:|;each:{x'y};scan:{x\y};over:{x/y};prior:{x':y} +mmu:$;lsq:!;inv:!:;md5:-15!;ltime:%:;gtime:{t+x-%t:x+x-%x}; /xnull:{$[0>@y;(,y)@~x=y;x=y;y 0N;y]} + +/aggr: last sum prd min max avg wsum wavg f/ /beta:{cov[x;y]%var x} +count:#:;first:*:;svar:{(n*var x)%-1+n:(#x)-+/^x};sdev:{sqrt svar x};scov:{(n*cov[x;y])%-1+n:(#x)-+/^x+y};med:{avg x(@x;'`rank;1_x,,x 0N]}; +rank:{$[0h>@x;'`rank;<@x;'`rank;@x;'`rank;>x]} +asc:{$[99h=@x;(!x)[i]!`s#r i:@x;'`rank;`s#x@r:. x;0h>@x;'`rank;x@>x]} + +msum:{$[99h=@y;(!y)!.z.s[x;. y];y-(-x)_(0i*x#y),y:+\y]};mcount:{msum[x;~^y]};mavg:{msum[x;0.0^y]%mcount[x;y]};mdev:{sqrt mavg[x;y*y]-m*m:mavg[x;y:"f"$y]} +xrank:{$[0h>@y;'`rank;_y*x%#y:<@y;'`rank;y(!#y)-x]};rotate:{$[0h>@y;'`rank;98h<@y;'`type;#y;,/|(0;mod[x;#y])_y;y]};ema:{(*y)(1f-x)\x*y} + +/other: ~,#_ !. getenv exit +distinct:?:;group:=:;where:&:;flip:+:;type:@:;key:!:;til:{$[0>@x;!x;'`type]};value:get:.:;attr:-2!;cut:{$[0h>@x;x*!-_-(#y)%x;x]_y} +set:{$[@x;.[x;();:;y];-19!((,y),x)]};upsert:.[;();,;] / :: ,: files? +raze:,/;union:?,;inter:{x@&x in y};except:{x@&~x in y};cross:{n:#m:&(#x)##y;$[99h=@x;((!x)[m],'n#!y)!(. x)[m],'n#. y;((),x)[m],'n#y]} /extant:{x@&~^x} +sv:{x/:y};vs:{x\:y};sublist:{$[99h=@y;sublist[x;!y]!sublist[x;. y];~0h>@x;$[.Q.qp y;.Q.ind[y];y]i+!"j"$0|x[1]&(#y)-i:*x;abs[x]<#y;x#y;y]} + +/file&comm +read0:0::;read1:1::;hclose:>:;hdel:~:;hsym:"s"$-1!';hcount:-7!;peach:{x':y};system:."\\", + +/string: like ss +ltrim:{$[~t&(77h>t)|99ht)|99ht)|99ht)|99h@z;:[;z];z]]} + +/select insert update delete exec / fkeys[&keys] should be eponymous, e.g. order.customer.nation +/{keys|cols}`t `f's{xasc|xdesc}`t n!`t xcol(prename) xcols(prearrange) FT(xcol xasc xdesc) +view:{$`. .`\:x};tables:{."\\a ",$$[^x;`;x]};views:{."\\b ",$$[^x;`;x]} +cols:{$[.Q.qp x:.Q.v x;.Q.pf,!+x;98h=@x;!+x;11h=@!x;!x;!+0!x]} /cols:{!.Q.V x} +xcols:{(x,f@&~(f:cols y)in x)#y};keys:{$[98h=@x:.Q.v x;0#`;!+!x]};xkey:{(#x)!.[0!y;();xcols x]}; +xcol:{.Q.ft[{+$[99h=@x;@[!y;(!y)?!x;:;. x];x,(#x)_!y]!. y:+y}x]y};xasc:{$[$[#x;~`s=-2!(0!.Q.v y)x;0];.Q.ft[@[;*x;`s#]].Q.ord[<:;x]y;y]};xdesc:{$[#x;.Q.ord[>:;x]y;y]} +fkeys:{(&~^x)#x:.Q.fk'.Q.V x};meta:{([!c].Q.ty't;f:.Q.fk't;a:-2!'t:. c:.Q.V x)} + +/ R uj R(union join) R lj K(left(equi/asof)join) trade asof`sym`time!(`IBM;09:31:00.0) +lj:{.Q.sx[x[;z]]y}{$[$[99h=@y;(98h=@!y)&98h=@. y;()~y];x,\:y;'"type"]} /;la:{$[&/j:z>-1;x,'y z;+.[+ff[x]y;(!+y;j);:;.+y z j:&j]]}{la[x;. y](!y)?(!+!y)#x}[;y]]x} /lj:,\:;aj:{lj[y]`s#xkey[x]z};aj0:{lj[y]`s#(x#z)!z}; /;bn:{@[i;&0>i:x bin y;:;#x]} +ljf:{.Q.sx[x[;z]]y}{$[`s=-2!y;ajf[!+!y;x;0!y];$[&/j:(#y:. y)>i?:(!+i:!y)#x;.Q.fl[x]y i;+.[+x;(f;j);:;.+.Q.fl[((f:!+y)#x:.Q.ff[x]y)j]y i j:&j]]]} +.Q.ajf0:{[f;g;x;y;z]x,:();z:0!z;d:$[g;x_z;z];g:(:;^)f;f:(,;^)f;$[&/j:-1i:(!y)?(!+!y)#x}[;y]]x} +ijf:{.Q.ft[{.Q.fl[x j]y i j:&(#y:. y)>i?:(!+i:!y)#x}[;y]]x} +pj:{.Q.sx[{x+0i^y(!+!y)#x}[;y]]x};asof:{f:!$[99h=@y;y;+y];(f_x)(f#x)bin y} +uj:{$[()~x;y;()~y;x;98h=@x;x,(!+x:.Q.ff[x;y])#.Q.ff[y;x];lj[(?(!x),!y)#x]y]} +ujf:{$[()~x;y;98h=@x;x,(!+x:.Q.ff[x;y])#.Q.ff[y;x];ljf[(?(!x),!y)#x]y]} + +/wj[-1000 2000+\:trade`time;`sym`time;trade;(quote;(max;`ask);(min;`bid))] (given `sym`time xasc quote) +ww:{[a;w;f;y;z]f,:();e:1_z;z:*z;y,'n#+(:/'f)!+{[e;d;a;b]e .'d@\:\:a+!b-a}[*:'e;z f:1_'e]/'$[n:#*w;+$[#g;(g#z)?g#y;0]|/:a+$[#g:-1_f;(f#z)bin@[f#y;*|f;:;]@;z[*f]bin]'w;,0 0]} +wj:{[w;f;y;z].Q.sx[ww[0 1;w;f;;z]]y};wj1:{[w;f;y;z].Q.sx[ww[1;w-1 0;f;;z]]y} + +fby:{$[(#x 1)=#y;@[(#y)#x[0]0#x 1;g;:;x[0]'x[1]g:.=y];'`length]};xgroup:{x,:();a:x#y:0!y;$[#x_:y;+:'x@=a;a!+f!(#f:!+x)#()]};ungroup:{$[#x:0!x;,/+:'x;x]} +ej:{x,:();y[&#:'i],'(x_z)(!0),/i:(=x#z:0!z)x#y:0!y} /{ungroup lj[z]xgroup[x]y} + +/`[:../]t[.{csv|txt}] +save:{$[1=#p:`\:*|`\:x:-1!x;set[x;. *p]; x 0:.h.tx[p 1]@.*p]}' +load:{$[1=#p:`\:*|`\:x:-1!x;set[*p;. x];set[*p].h.xt[p 1]@0:x]}' +rsave:{x:-1!x;.[`/:x,`;();:;.*|`\:x]}' +rload:{x:-1!x;.[*|`\:x;();:;. x]}' +dsave:{.[*x;1_x,y,`;:;@[;*!+a;`p#].Q.en[*x]a:. y];y}/: + +show:{1 .Q.s x;};csv:"," / ";" also \z 1 for "D"$"dd/mm/yyyy" + +parse:{$["\\"=*x;(system;1_x);-5!x]};eval:-6!;reval:-24! +\d .Q /def[`a`b`c!(0;0#0;`)]`b`c!(("23";"24");,"qwe") +k:4.1;K:0Nd;host:-12!;addr:-13!;gc:-20!;ts:{-34!(x;y)};gz:-35!;w:{`used`heap`peak`wmax`mmap`mphy`syms`symw!(."\\w"),."\\w 0"} / used: dpft en par chk ind fs fu fc +res:`abs`acos`asin`atan`avg`bin`binr`cor`cos`cov`delete`dev`div`do`enlist`exec`exit`exp`getenv`hopen`if`in`insert`last`like`log`max`min`prd`select`setenv`sin`sqrt`ss`sum`tan`update`var`wavg`while`within`wsum`xexp +addmonths:{("d"$m+y)+x-"d"$m:"m"$x} +f:{$[^y;"";y<0;"-",f[x;-y];y<1;1_f[x;10+y];9e15>j:"j"$y*prd x#10f;(x_j),".",(x:-x)#j:$j;$y]} +fmt:{$[x<#y:f[y;z];x#"*";(-x)$y]} +pykxld:{x:("#!"~2#*x)_x:-1!'x;+(1+*:'i;)@"\n"/:'x i:(&|1^\|0N 0 1@"/ "?*:'(v:x i),'"/")_i:&~|':(b?-1)#b:+\-/x~\:/:+,"/\\"} + +/DO NOT USE ROUTINES PAST HERE. SUBJECT TO CHANGE +ff:{$[&/(!+y)in f:!+x;x;x,'(f_y)(#x)#0N]} +fl:{$[98h=t:@x;+fl[+x;+y];99h=t;@[x,y;f;:;x[f]fl'y f@:&(f:!x)in!y];t&t<77h;x^y;@[y;i;:;x i:&(0<#:'x)&~#:'y]]} +opt:{[o]x::$[#i:&o like"-[^0-9]*";i[0]#o;o];((`$1_*:)'o)!1_'o:i_o};def:{x,((!y)#x){$[0h>@x;*:;::](@*x)$y}'y};ld:-39! +qt:{$[99h=@x;(98h=@!x)&98h=@. x;98h=@x]}; v:{$[-11h=@x;.$[":"=*t:$x;`$t,"/";x];x]}; +qp:{$[~98h=@x;0;@x:.+x;~":"=*$x;0]}; V:{$[qp x:v x;((,pf)!,. pf),+$[(~x in!d0[])&(x:.+x)in !*. .Q.vt;.Q.vp x;d0[]@x];0h>@*x:+0!x;x@'!x;x]} +ft:{$[{$[99h=@t:v x;98h=@. t;0]}y;[n:#+!y;n!x 0!y];x y]}; +ord:{if[~&/b:{(!#x)=x?x}c:.q.cols z;'"dup ",$c b?0b];if[~&/b:(y,:())in c;'y b?0b];ft[@[;!+t:0!v z;::]]z;ft[@[;!+t;@[;x@+y!t y]]]z};nv:{$["."~*$x;x;`$"..",$x]} + +tx:{$[(76ht;t;11h=t:@x:. nv@!x;t;98h=t;7h;1=#x:+!x;tx@*x;7h]};tt:0|tx'. V@ +fk:{$[(20h>t)|76h@. nv t:!x;`;t]};t:" bg xhijefcspmdznuvts";ty:{$[0h>x:tx x;.q.upper t@-x;t x]} +nct:{$[`~x;,/nct'.q.tables`.;([]n:x;c:.q.cols x;t:tt x)]} + +fu:{[f;x]$[0h>@x;f x;f[u](u:?x)?x]} /uniques +fc:{$[1@x;`${$[(*x)in n,"_ ";"a",x;x]}x@&(x:$x)in an;ft[{s[i]:`$($s i:&((s:id'!x:+x) in`i,res,!`.q)),'"1";+({unm[x]}/s)!. x}]x]} +j10:64/:b6?;x10:b6@0x40\: /base64 J from char10 +j12:36/:nA?;x12:nA@0x24\: /base36 J from char12(cusip) +btoa:-32!;sha1:-33!;prf0:+`name`file`line`col`text`pos!*-37! +objp:{x like"[mgs][s3]://*"}; +lo:{if[$[1>@d:!f:-1!x;1;`.d~*d];:.[$[qt d;*|`\:f;99=@d;`.;'`type];();:;d:. f]];d@:&~d like"*$";p:(d=`par.txt)|d like"[0-9]*"; + if[y;if[objp x;'"no cd to object storage"];."\\cd ",$x;f:`:.];{.q.set[*|`\:x]$[0h>@!x;. x;x`]}'`/:'f,'d@&~(d=`html)|(d like"*#")|p|s:d like"*.?";if[|/p;L[d@&p;f;~y|objp x]];if[z&~`.=x;(."\\l ",$:)'`/:'f,'d@&s&~p];} +l:lo[;1;1] + +sw:{.q.sublist[_.5**|."\\C"]x};tab:{" "/:'(x*|/#:''y)$/:y};t0:{$[x>#y;((#*y)#'" -"),y;y]} +s1:{-3!x} /{$[0>t:@x;$x;99h76h;s1'x;t;$x;|/(97ht:?@:'x;s1'x;(1<#?#:'x)|$[1=#t;(*t)in 1 4 10h;0];s1'x;tab[1]@s2'sw'x]} +S:{x[1]{$[x<#y;((x-2)#y),"..";y]}'$[t&77h>t:@z;,s1 z;99h#x)|(@x)&~11=@x} + +/ CAN EXIT HERE FOR SMALL Q +/ pt(tables) pf(date/month/year/int) pd(dirs) pv(values) pn(count) pt::0#pf::` +vt:(,`)!,()!(); +bv:{g:$[(::)~x;max;min];x:`:.;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;-1!'`$0:`/:x,`par.txt;,x]; + t:?,/!:'.Q.vt:{(&#:'x)(=,/. x)}'{({("DMJJ"`date`month`year`int?.Q.pf)$$last@x:`\:x}'x)!!:'x}'d; + d:{`/:'x[(. y)[;0]],'(`$$(. y)[;1]),'!y}[P]@{i:y@&:x=y x:@[x;&x~\:();:;*0#`. pf];(i;x i)}[;g]'+:t#/:g''.Q.vt:t#/:.Q.vt;.Q.vt:P!.q.except[. .Q.pf]''.Q.vt; + .Q.vp:t!{(+(,.Q.pf)!,0#. .Q.pf),'+(-2!'.+x)#'+|0#x:?[x;();0b;()]}'d;.Q.pt,:{.[x;();:;+.q.except[!+.Q.vp x;.Q.pf]!x];x}'.q.except[t;.Q.pt];} + +pt:pm:();MAP:{{$[0>@a:.+0!. x;.q.set[x]@.`$-1_$a;]}'a@&~(a:."\\a")in pt;pm::();if[#pt;pm::pt!{(`u#pd,'pv)!p2[(x;();0b;())]/':+(pd;pv)}':pt]} +dd:{`/:x,`$$y};d0:{dd[*|pd;*|pv]};p1:{$[#pm;pm[x](y;z);z in vt[y;x];vp x;+(!+. x)!`/:dd[y;z],x]};p2:{0!(?).@[x;0;p1[;y;z]]} + +p:{$[~#D;p2[x;d]':y;(,/p2[x]'/':P[i](;)'y)@<,/y@:i:&0<#:'y:D{x@&x in y}\:y]} +view:{pd::PD x:$[(::)~x;x;$[#x:&PV in x;x;'"invalid partition filter"]];u~:?u::..[pf;();:;pv::PV x];.[;();:;]'[pt;{+(x . y,`.d)!y}[x]':pt::!x:d0[]];pn::pt!(#pt)#()} + +jp:{$[$["w"~*$.z.o;u[$[(_u:$y)like"[a-z]:*";2;0]]in"\\/";("/"=*$y)|objp y];-1!y;`/:x,y]};rp:-500! +L:{D::();f:{!:'?{-1!`$("/"/:3#"/"\:x),"/_"}'u&objp'u:1_'$x;x};f@,d::$[z;rp y;y];if[x~,`par.txt;if[~#x:,/D::{x@&~(x:!x)like"*$"}'P::f@jp[d]'`$0:`/:d,*x;'empty]];if[^*PV::x@:."\\p")|."\\_";cn'.:'pt];} +/L:{P::,`:.;D::,x;pf::`date;pt::!P[0]@**D;T::P!P{z!{x!(y . ,[;`]z,)'x}[x;y]'z}[pt]'D} + +cn:{$[#n:pn x:.+x;n;pn[x]:(#p1 .)':+(x;pd;pv)]};pcnt:{+/cn x};dt:{cn[y]@&pv in x} +ind:{,/i[j]{fp[pf;p]p1[x;pd y;p:pv y]z}[.+x]'(j:&~=':i)_y-n i:(n:+\0,cn x)bin y} +fp:{+((,*x)!,(#z)#$[-7h=@y;y;(*|x)$y]),+z} +foo:{[t;c;b;a;v;d]if[v;g:*|`\:b f:*!b;b:1_b];,/$[v|~#a;d fp[$[v;f,g;pf]]';::]p[(.+t;c;b;a)]d} + +/ select{u's|a's[by[date,]u's]}from t where[date..],[sym{=|in}..],.. +a2:({(%;(sum;("f"$;x));(sum;(~^:;x)))};{(sum;(*;("f"$;x);y))};{(%;(wsum;x;y);(sum;(*;x;(~^:;y))))};{(cov;x;x)};{(sqrt;(var;x))} + {(-;(avg;(*;("f"$;x);y));(*;(avg;x);(avg;y)))};{(%;(cov;x;y);(*;(dev;x);(dev;y)))};{(.q.scov;x;x)};{(sqrt;(.q.svar;x))};{(*;(%;(#:;`i);(+;-1;(#:;`i)));(cov;x;y))};{'`part}) + +/ x0 translate;x1 aggrs;x2 translate ?terminal (subselect/exec within partition) unknown assumed uniform? +qd:{$[(#:)~*x;(?:)~*x 1;0]};xy:{`$$*&x~/:y}; x1:{$[qb x;();IN[*x;a0];$[qd x;1_x;,x];,/x1'1_x]} +x0:{$[qb x;x;IN[*x;a1];x0 a2[a1?*x]. 1_x;x0'x]};x2:{$[qb x; x;IN[*x;a0];$[qd x;(#:;(?:;(,/;xy[x 1]y)));[y:xy[x]y;$[(?:)~*x;(?:;(,/;y));(#:)~*x;(sum;y);(*x;y)]]];@[x;&~(::)~/:x;x2[;y]]]} +ua:{((`$$!#u)!u;x2[;u:?,/x1'x]'x:x0'x)};q0:{$[~qb x;,/q0'x;-11h=@x;*`\:x;()]};qe:{$[#x;99h=@x;1]} +ps:{[t;c;b;a]if[-11h=@t;t:. t];if[~qe[a]&qe[b]|-1h=@b;'`nyi];d:pv;v:$[q:0>@b;0;~#b;0;-11h=@v:*. b;pf~*`\:v;0] + if[$[~#c;0;@*c;0;-11h=@x:c[0]1;pf~*`\:x;0];d@:&-6!*c;c:1_c] + if[$[#c;0;(g:(. a)~,pf)|(. a)~,(#:;`i)];f:!a;j:dt[d]t;if[q;:+f!,$[g;?d@&0(#.:)'t:.q.tables`.;if[h:@[hopen;h;0];h"\\l .";>h]} + +/loop through text /lc:{+/{+/0xa=1:(x;y*z;y)}[x;m]'!-_-(-7!x)%m:1048576} /line count of big file +fsn:{[f;s;n]>[-7!s]{[f;s;x;n]i:(#r)^1+last@&"\n"=r:1:(s;x;n);f@`\:i#r;x+i}[f;s;;n]/0};fs:fsn[;;131000] +fpn:{{r:.Q.trp[y;h;{>x;'(y;z)}h:hopen":fifo://",1_$x];>h;r}[y + {[f;h;n]b:"x"$();while[#s:1:h;if[~n>#b,:s;v:`\:b,0x0a;if[1<#v;f@-1_v];b:"x"$last v]];if[#b;f@`\:b]}[x;;z]]};fps:fpn[;;131000] +/e.g. p:` sv(d:`:dir;`2007.02.12;`t);@[;`sym;`p#]p{@[x;`;,;`sym xasc .Q.en[d]get y]}/rdb's +dsftg:{[dpt;xom;f;tw;g]d:*dpt;dpt:par . dpt;x:*xom;if[~0m:_a:(m-:o:xom 1)%b:+/tw 10=@*tw;'`length];0N!($dpt),": ",$m&:M;i::0;n::1000000&_1e8%b + do[-_-m%n;@[dpt;`;$[i;,;:];en[d]@+g f!tw 1:(x;o+b*i;b*n&:m-i)];0N!i+:n];dpt};M:0W + +/ fill in empty partitions +chk:{if[x~(::);'"missing dir argument"];f:{`/:'x,'d@&(d:!x)like"[0-9]*"};d@:>.:'$last'`\:'d:$[`par.txt in!x;,/f'-1!'`$0:`/:x,`par.txt;f x] + {[e;u;d]u[i]{.[x;(y;`);:;?[z;();0b;()]]}[d]'e i:&~u in!d}[d[(+u in/:t)?\:1b](0#.)'u,'`;u:?,/t:!:'d]'d} +Ll:-100!;Lp:-101!;Lx:-102!;Lu:-103!;Ls:-104!;fqk:{("<"~*x)|x~*-3#.{}};fql:{x[1;1]~""};btx:{r:,Lp x;while[@x:Lu(0b;x);r,:(~fql v)#,v:Lp x;if[(fqk v[1]1)&".Q.dbg"~*v 1;f:(Lx(x;"k";,"v"))1]];$[~#f;r;@[r;r?f;,;,">>"]]};bt:{1'pl'(2*4<|/#:'v)_v:1_btx Ll`;};sbt:{,/pl'x};trp:{-105!(x;,y;z)};trpd:{-105!(x;y;z)} +dr:{0:0};dw:{@[2@;x;::];};pl0:{.[{[f;v;x;y;z]g:{$[x>0;x&#y;x|-#y]#y};m:1;n:0|(b:+\0,1+#:'s:`\:s)bin y:(0|y)+2*"k)"~2#s:x 3;sf:$[#x 1;x[1],$[0>x 2;"";":",($n+x 2)],": ";""];sn:{$[(#x)&@x;((2*".."~2#x)_x),":";()]}@*x;h:($[4<#v;*|v;" "],5$"[",($z),"]") + if[("locked"~x 3)|$[f;0b;fqk x 1];:h,"(",($[#sn;-1_sn;-2_sf]),")\n\n"];h,:sf,sn;h,`/:((n>m)#,""),g[-m+1;(n+1)#s],(,(" \t"(((#h)*n<1)#0),(y-b n)#s[n]="\t"),"^"),g[m](n+1)_s}[x;y];3#1_y;{dw"pl0\n";}]};pl:pl0 0 +jl8:{$[$[#x;~x[3]in y[;3];0b];,x;()],y};srr:{@[{if["abort"~*x;:""];"'",$[.z.q;($.z.Z)," ";""],(x[0],"\n"),/pl'jl8[x 3;x 2]};x;"srr\n"]};prr:{dw srr x}; +lu:{f:{$[@a:Lu(x;*y);(a;Lp a);()]}x;r:{$[#x;fql x 1;0b]}f/f y;$[#r;$[.Q.dbg~**r 1;y;r];y]} +DL:0;dbg:{pm:{dw" q"[x],$[1<#d:$."\\d";d;""]," )"[x],(1|DL)#")"};dq:{v:$[#*y;(~"k)"~2#y[1;3];*(.**y)3);1<#*y 1;@[*y 1;0;"q"=];(1b;`)];x v 1;*v}ds:{."\\d ",((~"."=*x)#"."),x:$x};de:@[{DL-:1;x y;{."\\x .z.e",x}'"xy"}ds;;::] + @[{DL+:1;.z.ex:*x 1;.z.ey:x[1]1};x;::];b:$[#*v:Lp y;0xff~(*. bf:**v)bi:v[0;1];0b];if[b;dw "#",($bi),"\n"];w:(y0:y;v:Lp y);if[e:@*x;prr x];d0:."\\d" + y:*w:{$[~fqk x[1;1]1;x;lu[0b]x]}/w;q:dq v:w 1;dw pl v;while[*(~(,"\\")~l:dr[];pm[q]);$[l~"\\ ";q:~q;l in+,".`&";[w:$["&"=*l;;lu["."=*l]](y;v);y:*w;v:w 1;if[y~y0;prr x];dw pl0[1]v;q:dq v] + (~e)&l in+,":>";:*($[b;bc[bf;bi];]0 0N(":"=*l);de d0);"'"=*l;:*((1_l;());de d0);c&~#*r:Lx(y;"kq"q;(c:":"=*l)_l);:*(r;de d0);$[#*r;::;dw $[q;.Q.s;{$[(::)~x;"";`/:,-3!x]}]r 1]]];de d0;("abort";())} +err:{(*x;jl8[x 3;$[@y;btx y;()],x 2])}; BP:(,"")!,();bp:{*|(Ls(x;(),y;(#y)#z);(*. x)y)};bs:{BP[$x],:y!bp[x;y,:();0xff];};bu:{bp[x;y;BP[$x;y,:()]]} +bd:{bu[x;y];@[`BP;$x;_;y]};bc:{bu[x;y];dbg::{[d;f;i;r;u;v]dbg::d;bs[f;i];$[#*u;d[u;v];r]}[dbg;x;y;z];-1} + +\d .h / c +htc:{,/("<";x;y;"")};hta:{,/("<";$x;,/" ",'($!y),'"=",'{$[10h=@x;"\"",x,"\"";$x]}'. y;">")};htac:{,/(hta[x]y;z;"")} +ha:{htac[`a;(,`href)!,x]y};hb:{htac[`a;`target`href!`v,,x]y};pre:{htc[`pre]@`/:x};xmp:{htc[`xmp]@`/:x} +d:" ";cd:{.q.csv 0:$[.Q.qt x;![x;();0b;(!t)[c]!,:'.q.sv[d]@/:'$v c:&(~l=-10h)&0>l:.Q.tx'v:. t:+0!x];x]};td:{"\t"0:x};hc:{"<"/:"<"\:x};xs:.q.ssr/[;"&<>";("&";"<";">")] +xd:{g:{(#*y)#'(,,"<",x),y,,,""};(,""),(,/'+g[`r]@,/(!x)g'{,xs'$[11h=@x;$x;t&77h>t:@x;$x;x]}'x:+0!x),,""} + +ex:("";);iso8601:{$[^x;"";@[$"p"$x;4 7 10;:;"--T"]]} +eb:{htac[`Workbook;(`$"xmlns",/:$``:ss`:o`:x`:html)!("urn:schemas-microsoft-com:office:",/:$(2#`spreadsheet),`office`excel),,"http://www.w3.org/TR/REC-html40"]x} +es:{htac[`Worksheet;(,`ss:Name)!,$x]htc[`Table]@,/(htc[`Row]@,/ec')'(,!+y),+.+y:0!y};ed:{ex eb es[`Sheet1]x};edsn:{ex eb@"\r\n"/:(!x)es'. x} +ec:{htc[`Cell]htac[`Data;(,`ss:Type)!,$`String`Number`String`DateTime`DateTime`String i](xs;$:;xs@$:;iso8601;iso8601 1899.12.31+"n"$;xs@$:)[i:-10 1 10 12 16 20h bin -@x]x} + +\d .j /[]{} Cbg*xhijefcspmdznuvt +e:{"c"$$[x<128;x;0b/:'@[;0;(1b,i#1b),]10b,/:((5 10;0 4 10)i:x>2047h)_0b\:"h"$x]}@256/: +q:"\"";s:{q,x,q};es:{s@,/{$[x in r:"\t\n\r\"\\";"\\","tnr\"\\"r?x;x]}'x};J:(($`0`1)!$`false`true;s;{$[#x;x;"null"]};es;{s@[x;&"."=8#x;:;"-"]};s)1 2 5 11 12 16h bin +/j:{$[10=abs t:@x;es x;(::)~x;"null";99=t;"{",(","/:({$[q~*x;x;s x]}'j'!x),'":",'j'. x),"}";-1*x;"F"$x;"n"=*x;0n;"t"=*x]} +/k:{c x@&~v[x]&x in" \t\n\r"};v:{~0(0 1 0;1 0 2;1 1 1)\x*3>x:" \"\\"?x};u:{?[v x;x;" "]};d:{$[1" +ka:-41! +c0:`024C7E;c1:`958600;logo:"kx.com" /logo:,/(c0;c1;c0){htac[`font;(,`color)!,x]y}'("[kx";"systems";"]") /808000 +logo:,/(c0;c1){htac[`font;(,`color)!,x]htc[`b]y}'("KX";".com") +sa:"a{text-decoration:none}",/`link`visited`active{"a:",($x),"{color:",y,"}"}'$(c0;c0;c1) +html:{htc[`html]htc[`head;htc[`style]sa,sb],htc[`body]x};sb:"body{font:10pt verdana;text-align:justify}" +fram:{htc[`html]htc[`head]htc[`title;x],htac[`frameset;(,`cols)!,($116|316&43+(7+"?"~*z)*|/#:'y),",*"]@,/hta[`frame]'((,`src)!,*z;`name`src!`v,,*|z)} +jx:{[j;x]x:val x;$[$[.Q.qt[x];(N:(*."\\C")-4)";hr:{(#x)#"-"};nbr:htc[`nobr];code:{" "/:@[x;1+2*!_.5*#x:"\t"\:x;{htc[`code]nbr hc x}]} +http:{$[#i:ss[x]"http://";(i[0]#x),/{ha[x;x:i#x],(i:&/x?") ")_x}'i_x;x]};text:{`/:{htc[`p]http code x}'x@&0<#:'x} +data:{if[100<#*x:.Q.tab[1]n#'x,\:(n:|/#:'x:"\t"\:'x:1_'x)#,"";0N!#*x;0N!'x];xmp$["- "~2#x 1;@[x;1;hr];x]} +ht:{[t]x:0:`$"src/",(T:t:$t),".txt";if[~"."=**x;T:*x;x:1_x];h:.q.trim(n@:i)_'x i:&0#x;x,1;(y#x),1+x y-:1]}\n),'" ",/:h + x:`/:("Copyright © ",logo;htc[`h5]T),((#x)#(text;data))@'x:(&~=':~(0<#:'x)&" "=*:'x)_x + c:h{hb[x,"#",y;z],br}[a:t,".htm"]'H;c:nbr@`/:$[&/n=:1;c;1_,/(,br),/:(&n)_c] + (`$a)1:fram[T;H]b:"a/",/:("_",a;a);(`$b)1:'html'(($.z.D),htc[`h5;"Contents"],c;x);} + +\d .o /odbc timedec/cs could be 3(23,12) +ex:{$[.Q.qt x:."s)",x;0!x;x]} / char(1+) binary(display2*) -1 longvarchar -4 longvarbinary sql_no_total(-4)SAS/4096 +T:`text`bit```tinyint`smallint`int`bigint`real`float`char`varchar``date`date`datetime``time`time`time +T0: -1 -7 0 0 -6 5 4 -5 7 8 1 12 0 9 9 11 0 10 10 10h +B0:4096 1 0 0 1 2 4 8 4 8 1 255 0 6 6 16 0 6 6 6 +C0:4096 1 0 0 3 5 10 19 7 15 1 255 0 10 10 23 0 8 8 8 +PS: 1 2 0 0 2 2 2 2 2 2 3 3 0 2 2 2 0 2 2 2h /char basic both +t:0 1 4 5 6 7 8 9 10 11 14 15 19;Columns:{$[#x;Cols`$x;,/Cols'.q.tables`.]} +q)TI:`DATA_TYPE xasc([]TYPE_NAME:T t;DATA_TYPE:T0 t;COLUMN_SIZE:C0 t;LITERAL_PREFIX:`;LITERAL_SUFFIX:`;CREATE_PARAMS:`;NULLABLE:1h;CASE_SENSITIVE:1h;SEARCHABLE:PS t;UNSIGNED_ATTRIBUTE:0h;FIXED_PREC_SCALE:0h;AUTO_INCREMENT:0h;LOCAL_TYPE_NAME:`;MINIMUM_SCALE:0h;MAXIMUM_SCALE:0h) +q)TypeInfo:{$[count x;select from TI where DATA_TYPE="H"$x;TI]} /msqry32 uses special e.g. select p.i from p p +q)Special:{([]SCOPE:2h;COLUMN_NAME:(0&0 similar position substring trim {lower|upper} diff --git a/src/pykx/lib/4-1-libs/qlog.q_ b/src/pykx/lib/4-1-libs/qlog.q_ new file mode 100644 index 0000000..d45d4b1 Binary files /dev/null and b/src/pykx/lib/4-1-libs/qlog.q_ differ diff --git a/src/pykx/lib/4-1-libs/read.q b/src/pykx/lib/4-1-libs/read.q new file mode 100644 index 0000000..41bfd1a --- /dev/null +++ b/src/pykx/lib/4-1-libs/read.q @@ -0,0 +1,46 @@ +system"l ", {x sv (-1 _ x vs y),enlist "csvutil.q"}[$[.z.o~`w64;"\\";"/"]; (value{})6]; + +system"d .read"; + +// @kind readme +// @name .read/README.md +// # pykx.q.module.read +// @end + +// @kind function +// @fileoverview Loads a CSV file as a table; column types are guessed if not provided +// @param path {#hsym|symbol} The path to the csv file +// @param types {(char)|string|null} A list/string of uppercase type characters representing the types, or null. Space is used to drop the associated column. Null is used to guess the type of the associated column. +// @param delimiter {char|null} The single-character delimiter used in the csv file. If null, comma will be used. +// @param asTable {bool} Whether the first line of the csv file should be interpreted as column names. If true, a table will be returned. Otherwise, a list of vectors of columnar data will be returned. +// @returns {table|#any[][]} The data from the csv file as a table or list of vectors depending on the value of the asTable parameter +.read.csv:{[path; types; delimiter; asTable] + guessedTypes:.csv.info[path]`t; + :($[(::)~types;guessedTypes;1_?[1b,(::)~'types;guessedTypes;(::),types]];$[asTable;enlist;::]$[(::)~delimiter;delimiter:",";delimiter]) 0: hsym path + }; + +// @kind function +// @fileoverview Loads a file of typed data with fixed-width fields. It is expected that there will either be a newline after every record, or none at all. +// @param path {#hsym|symbol} The path to the fixed-width data file +// @param types {string} A string of uppercase type characters representing the types. Space is used to drop the associated column. +// @param widths {long[]} The widths of the fields +// @returns {#any[]|#any[][]} A vector or list of vectors representating the data +.read.fixed:{[path; types; widths] + :(types;widths) 0: path + }; + +// @kind function +// @fileoverview Loads a json file as a q object. Serialization/deserialization to/from JSON may not preserve q datatype +// @param path {#hsym|symbol} The path to the json file +// @returns {table} The JSON object converted to its closest q analogue +.read.json:{[path] + :.j.k raze read0 path + }; + +// @kind function +// @fileoverview Loads a q table +// @param path {#hsym|symbol} The path to the table file/directory +// @returns {table} The table stored at the given path +.read.qtab:{[path] + :get hsym path + }; diff --git a/src/pykx/lib/4-1-libs/rest.q_ b/src/pykx/lib/4-1-libs/rest.q_ new file mode 100644 index 0000000..0c69e53 Binary files /dev/null and b/src/pykx/lib/4-1-libs/rest.q_ differ diff --git a/src/pykx/lib/4-1-libs/s.k_ b/src/pykx/lib/4-1-libs/s.k_ new file mode 100644 index 0000000..e7d1aa8 Binary files /dev/null and b/src/pykx/lib/4-1-libs/s.k_ differ diff --git a/src/pykx/lib/4-1-libs/w64/q.dll b/src/pykx/lib/4-1-libs/w64/q.dll new file mode 100644 index 0000000..3c53506 Binary files /dev/null and b/src/pykx/lib/4-1-libs/w64/q.dll differ diff --git a/src/pykx/lib/4-1-libs/w64/q.lib b/src/pykx/lib/4-1-libs/w64/q.lib new file mode 100644 index 0000000..ea82b4b Binary files /dev/null and b/src/pykx/lib/4-1-libs/w64/q.lib differ diff --git a/src/pykx/lib/4-1-libs/write.q b/src/pykx/lib/4-1-libs/write.q new file mode 100644 index 0000000..9ad4a11 --- /dev/null +++ b/src/pykx/lib/4-1-libs/write.q @@ -0,0 +1,44 @@ +system"d .write"; + +// @kind readme +// @name .write/README.md +// # pykx.q.module.write +// @end + +// @kind function +// @fileoverview Splays and writes a q table to disk +// @param dirPath {#hsym|symbol} The path to the root directory into which the splayed table will be written +// @param name {symbol} The name of the table. A directory with this name within the root directory will be created, and will contain the serialized columns of the table +// @param table {table} The table to be splayed and written to disk +// @returns {#hsym} The path to the directory within dirPath which contains the splayed table +.write.splayed:{[dirPath; name; table] + :(hsym `$"/" sv string dirPath,name) set .Q.en[hsym dirPath;] table + }; + +// @kind function +// @fileoverview Writes a q object to disk +// @param path {#hsym|symbol} The path to the file which will store the given data. If a file with this path already exists, it is overwritten. +// @param data {#any} The data to be serialized and written to disk +// @returns {#hsym} The path written to +.write.serialized:{[path; data] + :hsym[path] set data + }; + +// @kind function +// @fileoverview Writes a CSV file given a table +// @param path {#hsym|symbol} The path to the csv file. If a file with this path already exists, it is overwritten. +// @param delimiter {char|null} The single-character delimiter used in the csv file. If null, comma will be used. +// @param table {table} The table to be written as a csv file +// @returns {#hsym} The path written to +.write.csv:{[path; delimiter; table] + :hsym[path] 0: $[(::)~delimiter;delimiter:",";delimiter] 0: table + }; + +// @kind function +// @fileoverview Writes a JSON representation of the given q object +// @param path {#hsym|symbol} The path to the JSON file. If a file with this path already exists, it is overwritten. +// @param data {#any} The q object to be written as a JSON file +// @returns {#hsym} The path written to +.write.json:{[path; data] + :hsym[path] 0: enlist .j.j data + }; diff --git a/src/pykx/lib/l64/kxreaper b/src/pykx/lib/l64/kxreaper index 6d9864b..79151c2 100644 Binary files a/src/pykx/lib/l64/kxreaper and b/src/pykx/lib/l64/kxreaper differ diff --git a/src/pykx/lib/l64/libkurl.so b/src/pykx/lib/l64/libkurl.so index 3277d14..6f44fb4 100644 Binary files a/src/pykx/lib/l64/libkurl.so and b/src/pykx/lib/l64/libkurl.so differ diff --git a/src/pykx/lib/l64/libobjstor.so b/src/pykx/lib/l64/libobjstor.so index 88a6ff8..828d6ad 100644 Binary files a/src/pykx/lib/l64/libobjstor.so and b/src/pykx/lib/l64/libobjstor.so differ diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/lib/l64/libq.so index 4c24bc9..2a8c586 100755 Binary files a/src/pykx/lib/l64/libq.so and b/src/pykx/lib/l64/libq.so differ diff --git a/src/pykx/lib/l64/pg b/src/pykx/lib/l64/pg index 476c7b8..f5989b0 100644 Binary files a/src/pykx/lib/l64/pg and b/src/pykx/lib/l64/pg differ diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/lib/l64arm/libq.so index be29981..9eb8f21 100755 Binary files a/src/pykx/lib/l64arm/libq.so and b/src/pykx/lib/l64arm/libq.so differ diff --git a/src/pykx/lib/m64/libq.dylib b/src/pykx/lib/m64/libq.dylib index f10d73a..b28aac5 100755 Binary files a/src/pykx/lib/m64/libq.dylib and b/src/pykx/lib/m64/libq.dylib differ diff --git a/src/pykx/lib/m64arm/libq.dylib b/src/pykx/lib/m64arm/libq.dylib index 5d1e7d5..5c4b079 100755 Binary files a/src/pykx/lib/m64arm/libq.dylib and b/src/pykx/lib/m64arm/libq.dylib differ diff --git a/src/pykx/lib/p.q b/src/pykx/lib/p.q deleted file mode 100644 index fff7ff9..0000000 --- a/src/pykx/lib/p.q +++ /dev/null @@ -1,121 +0,0 @@ -\d .p - -version:@[{EMBEDPYVERSION};0;`development]; - -o:first string .z.o; -$[o="w";if[3.6>.z.K;'`$"kdb+ version must be 3.6+"];if[3.5>.z.K;'`$"kdb+ version must be 3.5+"]]; -if[(o="w")&count getenv[`UNDER_PYTHON];'"embedPy running within a Python process not supported for Windows"]; -if[not .P.loaded:-1h=type@[`.p@;`numpy;`]; - sc:{"'",x,"'.join([__import__('sysconfig').get_config_var(v)for v in",ssr[.j.j y;"\"";"'"],"])"};pr:{"print(",x,");"}; - c:"-c \"",pr["'.'.join([str(getattr(__import__('sys').version_info,x))for x in ['major','minor']])"],"\"2>",$[o="w";"nul `$first@[system"python3 ",;c;{system"python ",c}];'"embedPy requires python 3.6 or higher on windows"]; - c:"-c \"",pr[$[o="w";sc["/python";`BINDIR`VERSION];sc["/libpython";`LIBDIR`LDVERSION]],"+'",$[o="w";".dll";o="l";".so";".dylib"],"'"],pr["__import__('sys').base_prefix"],pr["__import__('sys').prefix"],pr["__import__('sys').executable"],"\"2>",$[o="w";"nul ";ei 3;eo];"class"~5#x;$[x[5]in"*>";ei 5;eo];eo]x} - ]; -k)c:{'[y;x]}/|: / compose list of functions -k)ce:{'[y;x]}/enlist,|: / compose with enlist (for variadic functions) - -/ Aliases -if[not loaded;set'[`pyget`pyeval`pyimport;.p.get,.p.eval,import]]; -qeval:c`.p.py2q,pyeval - -/ Wrapper for foreigns -embedPy:{[f;x] - $[-11h<>t:type x0:x 0; - $[t=102h; - $[any u:x0~/:(*;<;>); - [c:(wrap;py2q;::)where[u]0;$[1=count x;.p.c c,;c .[;1_x]@]pyfunc f]; / call type - (:)~x0;[setattr . f,@[;0;{`$_[":"=s 0]s:string x}]1_x;]; - (@)~x0;$[count 2_x;.[;2_x];]wrap call[getattr[f;`$"__getitem__"];enlist x 1;()!()]; - (=)~x0;[call[getattr[f;`$"__setitem__"];raze 1_x;()!()];]; - '`NYI]; - wrap pyfunc[f]. x]; - ":"~first a0:string x0; / attr lookup and possible call - $[1=count x;;.[;1_x]]wrap f getattr/` vs`$1_a0; - x0~`.;f;x0~`;py2q f; / extract as foreign or q - wrap pyfunc[f]. x]} / default, call -unwrap:{$[i.isw x;x`.;x]} -xunwrap:{$[0=t:type x;.z.s each x;98=t;flip .z.s flip x;99=t;.z.s[key x]!.z.s value x;unwrap x]} -wfunc:{[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r} -i.wf:{[f;x]embedPy[f;x]} -wrap:ce i.wf@ -import:ce wfunc pyimport -.p.eval:ce wfunc pyeval -.p.get:ce wfunc pyget -.p.set:{[f;x;y]f[x]unwrap y;}.p.set -`key`value set'{list$[i.isf y;wrap;i.isw y;;'`type][y][x][]}@'`:keys`:values; -.p.callable:{$[i.isw x;x;i.isf x;wrap[x];'`type]} -.p.pycallable:{$[i.isw x;x(>);i.isf x;wrap[x](>);'`type]} -.p.qcallable:{$[i.isw x;x(<);i.isf x;wrap[x](<);'`type]} -/ is foreign, wrapped, callable -i.isf:isp -i.isw:{$[105=type x;i.wf~$[104=type u:first get x;first get u;0b];0b]} -i.isc:{$[105=type y;$[x~y;1b;.z.s[x]last get y];0b]}ce 1#`.p.q2pargs -setattr:{[f;x;y;z]f[x;y;z];}import[`builtins]`:setattr - -/ Calling python functions -pyfunc:{if[not i.isf x;'`type];ce .[.p.call x],`.p.q2pargs} -q2pargs:{ - if[x~enlist(::);:(();()!())]; / zero args - hd:(k:i.gpykwargs x)0; - al:neg[hd]_(a:i.gpyargs x)0; - if[any 1_prev[u]and not u:i.isarg[i.kw]each neg[hd]_x;'"keywords last"]; / check arg order - cn:{$[()~x;x;11<>type x;'`type;x~distinct x;x;'`dupnames]}; - :(unwrap each x[where not[al]¬ u],a 1;cn[named[;1],key k 1]!unwrap each(named:get'[(x,(::))where u])[;2],value k 1) - } -.q.pykw:{x[y;z]}i.kw:(`..pykw;;;) / identify keyword args with `name pykw value -.q.pyarglist:{x y}i.al:(`..pyas;;) / identify pos arg list (*args in python) -.q.pykwargs: {x y}i.ad:(`..pyks;;) / identify keyword dict (**kwargs in python) -i.gpykwargs:{dd:(0#`)!(); - $[not any u:i.isarg[i.ad]each x;(0;dd);not last u;'"pykwargs last"; - 1] -generator:{[f;i;n]i.gl[closure[f;i]`.;n]} - -/ Add cwd and $QHOME to sys.path -sp:.p.import[`sys]`:path -spq:distinct("";getenv`QHOME),sp` -sp[`:clear][]; -sp[`:extend]spq; -/ write python stdout/err to 1 and 2 -if[not@[{count key .pyq};::;0b];{.p.import[`sys;x][:;`:write;{x y;count y}y]}'[`:stdout`:stderr;1 2]]; -/ set sys.argv -if[not .p.eval["hasattr";.p.import`sys;`argv]`;.p.import[`sys][:;`argv;enlist""]] -if[not loaded;if[not count .p.import[`sys][`:argv]`;.p.import[`sys][:;`:argv;enlist""]]] - -/ Cleanup -{![`.p;();0b;x]}`getseq`ntolist`runs`wfunc`gethelp`sp`spq`loaded; -{@[`.p;x;:;.p.import[`builtins]hsym x]}each`tuple`list`dict`isinstance; - -/ VirtualEnv warning for windows users -if[.P.env&.z.o like"w*";-1"Warning: Virtual Environments not supported for embedPy on Windows. Using the 'BASE' version of Python, not the virtual environment Python";] diff --git a/src/pykx/lib/q.k b/src/pykx/lib/q.k index c795c29..274f4c1 100644 --- a/src/pykx/lib/q.k +++ b/src/pykx/lib/q.k @@ -108,7 +108,7 @@ s2:{$[99h=t:@x;((|/#:'x)$x:t0[#d;x]),'"| ",/:t0[#x:s2@!x]d:s2@. x;98h=t;(1#x),(, S:{x[1]{$[x<#y;((x-2)#y),"..";y]}'$[t&77h>t:@z;,s1 z;99h@y;y:,y];p:{$[#y;y;x]}/'getenv@+`$_:\("HTTP";"NO"),\:"_PROXY";u:hap@x;t:~(~#*p)||/(*":"\:u 2)like/:{(("."=*x)#"*"),x}'","\:p 1;a:$[t;p:hap@*p;u]1;r:(`$":",,/($[t;p;u]0 2))($*y)," ",$[t;x;u 3]," HTTP/1.1",s,(s/:(~![-35;::])_("Accept-Encoding: gzip";"Connection: close";"Host: ",u 2),((0<#a)#,$[t;"Proxy-";""],"Authorization: Basic ",btoa a),$[#z;("Content-type: ",z 0;"Content-length: ",$#z 1);()],1_y),(d:s,s:"\r\n"),$[#z;z 1;""];(0;n)_$[#ss[(n:4+*r ss d)#r;"content-encoding: gzip\r\n"];-35!r;r]} hg:{hmb[x;`GET;()]1};hp:{hmb[x;`POST;(y;z)]1} a0:(#:;*:;last;sum;prd;min;max;.q.all;.q.any;?:),a1:(avg;wsum;wavg;var;dev;cov;cor),`.q `svar`sdev`scov`med @@ -144,7 +144,7 @@ a2:({(%;(sum;("f"$;x));(sum;(~^:;x)))};{(sum;(*;("f"$;x);y))};{(%;(wsum;x;y);(su / x0 translate;x1 aggrs;x2 translate ?terminal (subselect/exec within partition) unknown assumed uniform? qd:{$[(#:)~*x;(?:)~*x 1;0]};xy:{`$$*&x~/:y}; x1:{$[qb x;();IN[*x;a0];$[qd x;1_x;,x];,/x1'1_x]} -x0:{$[qb x;x;IN[*x;a1];x0 a2[a1?*x]. 1_x;x0'x]};x2:{$[qb x; x;IN[*x;a0];$[qd x;(#:;(?:;(,/;xy[x 1]y)));[y:xy[x]y;$[(?:)~*x;(?:;(,/;y));(#:)~*x;(sum;y);(*x;y)]]];x2[;y]'x]} +x0:{$[qb x;x;IN[*x;a1];x0 a2[a1?*x]. 1_x;x0'x]};x2:{$[qb x; x;IN[*x;a0];$[qd x;(#:;(?:;(,/;xy[x 1]y)));[y:xy[x]y;$[(?:)~*x;(?:;(,/;y));(#:)~*x;(sum;y);(*x;y)]]];@[x;&~(::)~/:x;x2[;y]]]} ua:{((`$$!#u)!u;x2[;u:?,/x1'x]'x:x0'x)};q0:{$[~qb x;,/q0'x;-11h=@x;*`\:x;()]};qe:{$[#x;99h=@x;1]} ps:{[t;c;b;a]if[-11h=@t;t:. t];if[~qe[a]&qe[b]|-1h=@b;'`nyi];d:pv;v:$[q:0>@b;0;~#b;0;-11h=@v:*. b;pf~*`\:v;0] if[$[~#c;0;@*c;0;-11h=@x:c[0]1;pf~*`\:x;0];d@:&-6!*c;c:1_c] diff --git a/src/pykx/lib/rest.q_ b/src/pykx/lib/rest.q_ index b328544..0c69e53 100644 Binary files a/src/pykx/lib/rest.q_ and b/src/pykx/lib/rest.q_ differ diff --git a/src/pykx/lib/s.k_ b/src/pykx/lib/s.k_ index f8d2e63..e7d1aa8 100644 Binary files a/src/pykx/lib/s.k_ and b/src/pykx/lib/s.k_ differ diff --git a/src/pykx/lib/w64/q.dll b/src/pykx/lib/w64/q.dll index 84cadb8..312318e 100644 Binary files a/src/pykx/lib/w64/q.dll and b/src/pykx/lib/w64/q.dll differ diff --git a/src/pykx/lib/w64/q.lib b/src/pykx/lib/w64/q.lib index c0db9fa..2857d79 100644 Binary files a/src/pykx/lib/w64/q.lib and b/src/pykx/lib/w64/q.lib differ diff --git a/src/pykx/nbextension.py b/src/pykx/nbextension.py index e2f7c18..beb6c3a 100644 --- a/src/pykx/nbextension.py +++ b/src/pykx/nbextension.py @@ -14,6 +14,7 @@ def q(instructions, code): # noqa no_ctx = False displayRet = False debug = False + reconnection_attempts = -1 if len(instructions)>0: instructions = instructions.split(' ') @@ -69,6 +70,11 @@ def q(instructions, code): # noqa debug = True instructions.pop(0) continue + elif instructions[0] == '--reconnection_attempts': + reconnection_attempts = float(instructions[1]) + instructions.pop(0) + instructions.pop(0) + continue elif instructions[0] == '': instructions.pop(0) continue @@ -87,10 +93,11 @@ def q(instructions, code): # noqa large_messages=large_messages, tls=tls, unix=unix, - no_ctx=no_ctx + no_ctx=no_ctx, + reconnection_attempts=reconnection_attempts ) try: - _q(ld) + _q(ld, skip_debug=True) except kx.QError as err: if '.Q.pykxld' in str(err): # .Q.pykxld is not defined on the server so we pass it as inline code @@ -98,7 +105,7 @@ def q(instructions, code): # noqa lines = f.readlines() for line in lines: if 'pykxld:' in line: - ld = _q("k)"+line[7:-1]) + ld = _q("k)"+line[7:-1], skip_debug=True) break else: raise err @@ -115,13 +122,13 @@ def q(instructions, code): # noqa ''', ld, code, - b'jupyter_cell.q' + b'jupyter_cell.q', skip_debug=True ) if not kx.licensed: ret = ret.py() for i in range(len(ret['res'])): if ret['err'][i]: - if debug: + if debug or kx.config.pykx_qdebug: print(ret['trc'][i].decode()) raise kx.QError(ret['res'][i].decode()) else: @@ -130,7 +137,7 @@ def q(instructions, code): # noqa for i in range(len(ret)): r = _q('@', ret, i) if r['err']: - if debug: + if debug or kx.config.pykx_qdebug: print(r['trc']) raise kx.QError(r['res'].py().decode()) else: diff --git a/src/pykx/pandas_api/__init__.py b/src/pykx/pandas_api/__init__.py index 83358b9..c884675 100644 --- a/src/pykx/pandas_api/__init__.py +++ b/src/pykx/pandas_api/__init__.py @@ -73,6 +73,7 @@ def return_val(*args, **kwargs): from .pandas_set_index import _init as _set_index_init, PandasSetIndex from .pandas_reset_index import _init as _reset_index_init, PandasResetIndex from .pandas_apply import _init as _apply_init, PandasApply +from .pandas_sorting import _init as _sorting_init, PandasSorting def _init(_q): @@ -84,12 +85,13 @@ def _init(_q): _merge_init(q) _set_index_init(q) _apply_init(q) + _sorting_init(q) _reset_index_init(q) class PandasAPI(PandasApply, PandasMeta, PandasIndexing, PandasReindexing, PandasConversions, PandasMerge, PandasSetIndex, PandasGroupBy, - PandasResetIndex): + PandasSorting, PandasResetIndex): """PandasAPI mixin class""" replace_self = False prev_locs = {} diff --git a/src/pykx/pandas_api/pandas_conversions.py b/src/pykx/pandas_api/pandas_conversions.py index 870c356..4f0f5bf 100644 --- a/src/pykx/pandas_api/pandas_conversions.py +++ b/src/pykx/pandas_api/pandas_conversions.py @@ -83,9 +83,16 @@ def astype(self, dtype, copy=True, errors='raise'): # noqa: max-complexity: 13 try: dict_grab = {} - for k, v in dtype.items(): + for k, v in dtype.copy().items(): dict_grab[k] = abs(kx_type_to_type_number[[x for x in kx_type_to_type_number.keys() if x in str(v)][0]]) + strval = q('{3_first x}', + q.qsql.exec(self.dtypes, 'datatypes', f'columns=`{k}')) + qtype = kx_type_to_type_number[strval.py().decode('utf-8')] + if abs(qtype) == dict_grab[k]: + dict_grab.pop(k) + if dict_grab == {}: + return self except IndexError: raise QError('Value passed does not match PyKX wrapper type') @@ -136,21 +143,21 @@ def astype(self, dtype, copy=True, errors='raise'): # noqa: max-complexity: 13 b1:(tabColTypes=10h) & dictColTypes=11h; c1:()!(); if[any b1; - dCols1:dictCols where b1; - f1:{(`$';x)}; c1:dCols1!(f1 each dCols1)]; + dCols1:dictCols where b1; + f1:{(`$';x)}; c1:dCols1!(f1 each dCols1)]; // Check casting to symbol, run `$string col // (also covers any symbol -> symbol cases) b2:(dictColTypes=11h) & not (b1 or tabColTypes=0h); c2:()!(); if[any b2; - dCols2:dictCols where b2; - f2:{(`$string; x)}; c2:dCols2!(f2 each dCols2)]; + dCols2:dictCols where b2; + f2:{(`$string; x)}; c2:dCols2!(f2 each dCols2)]; // Casting to string covering all cases except mixed lists b3: (dictColTypes=10h) & not tabColTypes=0h; c3:()!(); if[any b3; - dCols3:dictCols where b3; - f3:{(string; x)}; c3:dCols3!(f3 each dCols3)]; + dCols3:dictCols where b3; + f3:{(string; x)}; c3:dCols3!(f3 each dCols3)]; // Check mixed lists // if string column then allow cast to symbol // Check at beginning of method @@ -159,8 +166,8 @@ def astype(self, dtype, copy=True, errors='raise'): # noqa: max-complexity: 13 (tabColTypes=0h) & tabColNestedTypes=10h; c4:()!(); if[any b4; - dCols4:dictCols where b4; - f4:{(`$; x)}; c4:dCols4!(f4 each dCols4)]; + dCols4:dictCols where b4; + f4:{(`$; x)}; c4:dCols4!(f4 each dCols4)]; // Any matches that meet the vanilla case // and don't have additonal needs --> not any (bools) b5:not any (b1;b2;b3;b4); diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 2d43c84..7b9f82e 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -2,6 +2,8 @@ from ..exceptions import QError from . import api_return, MetaAtomic +import warnings + def _init(_q): global q @@ -301,13 +303,17 @@ def _drop_columns(tab, labels, errors=True): def _rename_index(tab, labels): if "Keyed" in str(type(tab)): - for x in list(labels.keys()): - if type(x) is not int: - labels.pop(x) return q('''{ - idx:first flip key x; - idx:@[(count idx;1)#idx;idx?raze key y;y]; - ([] idx)!value x}''', + kc:first cols x; + idx:key[x]kc; + newinds:([] ky:key y;vl:value y); + newinds:update ind:{.[?;(x;y);count x]}[idx] each ky from newinds; + newinds:select from newinds where not ind=count idx; + if[0~count newinds;:x]; + idx:$[(0h~type idx) or (type[idx]~type[newinds`vl]); + @[idx;newinds`ind;:;newinds`vl]; + 1_ @[(::),idx;1+newinds`ind;:;newinds`vl]]; + (@[key x;kc;:;idx])!value x}''', tab, labels) # noqa else: return ValueError(f"""Only pykx.KeyedTable objects can @@ -320,6 +326,8 @@ def _rename_columns(tab, labels): raise ValueError('pykx.Table column names can only be of type pykx.SymbolAtom') if type(x) is not str: labels.pop(x) + if x not in tab.columns: + labels.pop(x) if "Keyed" in str(type(tab)): return q('''{ c:cols value x; @@ -425,27 +433,38 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index return t def rename(self, labels=None, index=None, columns=None, axis=0, - copy=None, inplace=False, level=None, errors='ignore'): + copy=None, inplace=False, level=None, errors='ignore', mapper=None): + if labels is not None: + warnings.warn("Keyword 'labels' is deprecated please use 'mapper'", + DeprecationWarning) + if mapper is None: + mapper = labels if ("Keyed" not in str(type(self)) and columns is None and ((axis == 'index' or axis == 0) or (index is not None))): raise ValueError("Can only rename index of a KeyedTable") - if labels is None and index is None and columns is None: + if (not isinstance(mapper, dict) and mapper is not None): + raise NotImplementedError("Passing of non dictionary mapper items not yet implemented") + if (columns is None and ((axis == 'index' or axis == 0) or (index is not None))): + if len(self.index.columns)!=1: + raise NotImplementedError( + "Index renaming only supported for single key column KeyedTables") + if mapper is None and index is None and columns is None: raise ValueError("must pass an index to rename") elif axis !=0 and (index is not None or columns is not None): raise ValueError("Cannot specify both 'axis' and any of 'index' or 'columns'") if (columns is not None or axis==1) and level is not None: - raise ValueError('q/kdb+ tables only support symbols as column labels (no multi index on the column axis).') # noqa + raise ValueError('q/kdb+ tables only support symbols as column mapper (no multi index on the column axis).') # noqa if copy is not None or inplace or level is not None or errors != 'ignore': raise ValueError('nyi') t = self - if labels is not None: + if mapper is not None: if axis == 1 or axis == 'columns': - t = _rename_columns(t, labels) + t = _rename_columns(t, mapper) elif axis == 0 or axis == 'index': - t = _rename_index(t, labels) + t = _rename_index(t, mapper) else: raise ValueError(f'No axis named {axis}') else: diff --git a/src/pykx/pandas_api/pandas_merge.py b/src/pykx/pandas_api/pandas_merge.py index 37b26ae..b1a2e86 100644 --- a/src/pykx/pandas_api/pandas_merge.py +++ b/src/pykx/pandas_api/pandas_merge.py @@ -207,14 +207,25 @@ def _merge_tables(left, right, on, how, added_idx, left_index, right_index, dist def _q_merge_tables(left, right, how, added_idx): res = left - left = q('0!', left) - right = q('1!', right) if how == 'inner': - res = q.ij(left, right) + if 'KeyedTable' not in str(type(right)): + raise ValueError("Inner Join requires a keyed table" + " for the right dataset.") + else: + res = q.ij(left, right) elif how == 'left': - res = q.lj(left, right) + if 'KeyedTable' not in str(type(right)): + raise ValueError("Left Join requires a keyed table" + " for the right dataset.") + else: + res = q.ij(left, right) elif how == 'right': - res = _q_merge_tables(right, left, 'left', added_idx) + + if 'KeyedTable' not in str(type(left)): + raise ValueError("Right Join requires a keyed table" + " for the left dataset.") + else: + res = _q_merge_tables(right, left, 'left', added_idx) if added_idx: res.pop(added_idx) return res diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 659a26b..e85ef51 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -1,3 +1,5 @@ +import warnings + from . import api_return from ..exceptions import QError @@ -112,11 +114,15 @@ def columns(self): @property def dtypes(self): + warnings.warn("dtypes column 'type' is deprecated, please use 'datatypes'", + DeprecationWarning) return q(''' {a:0!x; - flip `columns`type!( + d:flip `columns`datatypes!( a[`c]; - {$[x~"kx.List";x;x,$[y in .Q.a;"Atom";"Vector"]]}'[y `$/:lower a`t;a`t])} + {$[x~"kx.List";x;x,$[y in .Q.a;"Atom";"Vector"]]}'[y `$/:lower a`t;a`t]); + d[`type]:d[`datatypes];d + } ''', q.meta(self), _type_mapping) @property diff --git a/src/pykx/pandas_api/pandas_sorting.py b/src/pykx/pandas_api/pandas_sorting.py new file mode 100644 index 0000000..d54d9d4 --- /dev/null +++ b/src/pykx/pandas_api/pandas_sorting.py @@ -0,0 +1,92 @@ +from . import api_return +from ..wrappers import IntAtom, LongAtom, RealAtom, ShortAtom, SymbolAtom, SymbolVector + + +def _init(_q): + global q + q = _q + + +def process_keep(self, columns, n, keep, x): + return q(''' + {[tab;cls;n;keep;x] + cls:(),cls; + kys:$[99h~type tab;cols key tab;`$()]; + s:$[x~`largest;xdesc;x~`smallest;xasc;'"Unknown sort option"]; + index:(::); + if[keep~`last; + s:$[x~`largest;xasc;xdesc]; + index:reverse; + n:neg n; + ]; + r:s[cls] update pykx_temp__internal_index:i from ?[tab;();0b;cls!cls]; + if[`all~keep; + r:cls xgroup r; + r:0!n sublist r; + r:update pykx_temp__internal_running_sum:sums count each + pykx_temp__internal_index from r; + r:(1+(count[r]-1)^first where n <=r`pykx_temp__internal_running_sum) sublist r; + i:count each r`pykx_temp__internal_index; + :kys xkey (0!tab)raze r`pykx_temp__internal_index + ]; + kys xkey (0!tab) index n sublist r`pykx_temp__internal_index} + ''', self, columns, n, keep, x) + + +def check_column_types(self, cols): + column_types = [SymbolAtom, SymbolVector, str] + if isinstance(cols, list): + if not all(type(c) in column_types for c in cols): + raise ValueError('columns must be of type string, SymbolAtom or SymbolVector') + elif type(cols) not in column_types: + raise ValueError('columns must be of type string, SymbolAtom or SymbolVector') + + +def check_n(self, n): + n_types = [ShortAtom, IntAtom, LongAtom, RealAtom, int] + if type(n) not in n_types: + raise ValueError("Only numeric values accepted for n") + return True if n<1 else False + + +def nLargeSmall(self, n, order, columns=None, keep='first'): + if check_n(self, n): + return q.sublist(0, self) + keep_options = ['first', 'last', 'all'] + if keep not in keep_options: + raise ValueError('keep must be either "first", "last" or "all"') + if keep != 'first': + check_column_types(self, columns) + return process_keep(self, columns, n, keep, order) + asc = True if order == 'smallest' else False + sorted = self.sort_values(by=columns, ascending=asc) + return q('sublist', n, sorted) + + +class PandasSorting: + + @api_return + def sort_values(self, by=None, ascending=True): + check_column_types(self, by) + if not isinstance(ascending, bool): + raise ValueError(f"""For argument 'ascending' expected type bool, + received type {type(ascending)}.""") + if ascending: + if by is None: + self = q('asc', self) + else: + self = q('xasc', by, self) + else: + if by is None: + self = q('desc', self) + else: + self = q('xdesc', by, self) + return self + + @api_return + def nlargest(self, n, columns=None, keep='first'): + return nLargeSmall(self, n, "largest", columns, keep) + + @api_return + def nsmallest(self, n, columns, keep='first'): + return nLargeSmall(self, n, "smallest", columns, keep) diff --git a/src/pykx/pykx.c b/src/pykx/pykx.c index f71a1ff..2a409e2 100644 --- a/src/pykx/pykx.c +++ b/src/pykx/pykx.c @@ -285,6 +285,7 @@ void construct_args_kwargs(PyObject* params, PyObject** args, PyObject** kwargs, EXPORT K k_pyfunc(K k_guid_string, K k_args) { + if (pykx_threading) return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); PyGILState_STATE gstate; @@ -373,6 +374,7 @@ EXPORT K k_pyfunc(K k_guid_string, K k_args) { // k_eval_or_exec == 0 -> eval the code string // k_eval_or_exec == 1 -> exec the code string EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { + if (pykx_threading) return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); PyGILState_STATE gstate; @@ -547,7 +549,7 @@ EXPORT K foreign_to_q(K f) { PyGILState_Release(gstate); return k; } - long _addr = PyLong_AsLongLong(k_addr); + long long _addr = PyLong_AsLongLong(k_addr); K res = (K)(uintptr_t)_addr; r1_ptr(res); Py_XDECREF(toq_args); @@ -734,6 +736,7 @@ EXPORT K import(K module) { EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { + if (pykx_threading) return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; @@ -749,7 +752,6 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { if (!PyCallable_Check(pyf)) { return raise_k_error("Attempted to call non callable python foreign object"); } - int len = (has_no_args->j==0)?0:(int)args->n; PyObject* py_params = NULL; PyObject* py_kwargs = NULL; @@ -760,8 +762,10 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { PyGILState_Release(gstate); return k; } - } else - py_params = PyTuple_New(0); + } else{ + py_params = PyTuple_New(0); + } + if ((kK(kwargs)[0])->n != 0) { PyObject* factory_args = PyTuple_New(1); @@ -796,6 +800,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { PyGILState_Release(gstate); return k; } + K res; if (pyres == NULL) { pyres = Py_BuildValue(""); @@ -805,6 +810,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { Py_XDECREF(pyres); flush_stdout(); PyGILState_Release(gstate); + return res; } diff --git a/src/pykx/pykxq.c b/src/pykx/pykxq.c index 4d32806..879218a 100644 --- a/src/pykx/pykxq.c +++ b/src/pykx/pykxq.c @@ -429,7 +429,7 @@ EXPORT K foreign_to_q(K f) { PyGILState_Release(gstate); return k; } - long _addr = PyLong_AsLongLong(k_addr); + long long _addr = PyLong_AsLongLong(k_addr); K res = (K)(uintptr_t)_addr; r1(res); Py_XDECREF(toq_args); @@ -609,6 +609,7 @@ EXPORT K import(K module) { EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { + K k; P pyf = NULL; diff --git a/src/pykx/query.py b/src/pykx/query.py index 4edb733..ede1b32 100644 --- a/src/pykx/query.py +++ b/src/pykx/query.py @@ -1,10 +1,9 @@ """Query interfaces for PyKX.""" from abc import ABCMeta -from random import choices -from string import ascii_letters from typing import Any, Dict, List, Optional, Union import warnings +from uuid import uuid4 from . import Q from . import wrappers as k @@ -378,33 +377,38 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, modify=Fal raise TypeError("'table' object provided was not a K tabular object or an " "object which could be converted to an appropriate " "representation") - randstring = ''.join(choices(ascii_letters, k=32)) - self.randstring = randstring - table_name = f'.pykx.i._{randstring}' - self._q[table_name] = table + randguid = str(uuid4()) + self._q(f''' + {{@[{{get x}};`.pykx.i.updateCache;{{.pykx.i.updateCache:(`guid$())!()}}]; + .pykx.i.updateCache["G"$"{randguid}"]:x}} + ''', table) original_table = table - table = table_name + table_code = f'.pykx.i.updateCache["G"$"{randguid}"]' + if not inplace: + query_char = '!' if query_type in ('delete', 'update') else '?' + else: + query_char = table_code + (':!' if query_type in ('delete', 'update') else ':?') elif not isinstance(table, str): raise TypeError("'table' must be a an object which is convertible to a K object " "or a string denoting an item in q memory") - query_char = '!' if query_type in ('delete', 'update') else '?' - if (not inplace and query_type in ('delete', 'update')): - table_code = f'get`$"{table}"' else: - table_code = f'`$"{table}"' + if (not inplace and query_type in ('delete', 'update')): + table_code = f'get`$"{table}"' + else: + table_code = f'`$"{table}"' + query_char = '!' if query_type in ('delete', 'update') else '?' try: res = self._q( - f'{query_char}[{table_code};;;]', + f'{{{query_char}[{table_code};x;y;z]}}', where_clause, by_clause, select_clause, wait=True, ) if inplace and isinstance(original_table, k.K): - if query_type in ('delete', 'update'): - res = self._q[table_name] + res = self._q(table_code) if isinstance(res, QFuture): - raise QError("'inplace' not supported with asyncronous query") + raise QError("'inplace' not supported with asynchronous query") if type(original_table) != type(res): raise QError('Returned data format does not match input type, ' 'cannot perform inplace operation') @@ -412,7 +416,7 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, modify=Fal return res finally: if isinstance(original_table, k.K): - self._q._call(f'![`.pykx.i;();0b;enlist[`$"_{randstring}"]]', wait=True) + self._q._call(f'.pykx.i.updateCache _:"G"$"{randguid}"', wait=True) def _generate_clause(self, clause_value, clause_name, query_type): if clause_value is None: @@ -581,7 +585,7 @@ def prepare(self, query: str, *args: Any) -> k.List: Examples: - Note: When preparing a query with K types you don't have to fully constuct one. + Note: When preparing a query with K types you don't have to fully construct one. For example you can pass `kx.LongAtom(1)` as a value to the prepare function as well as just [`pykx.LongAtom`][]. This only works for Atom and Vector types. There is also a helper function for tables that you can use called `pykx.Table.prototype`. diff --git a/src/pykx/schema.py b/src/pykx/schema.py index 65bfe66..9de1d7d 100644 --- a/src/pykx/schema.py +++ b/src/pykx/schema.py @@ -1,4 +1,11 @@ -"""Functionality for the manipulation and creation of schemas""" +""" +Functionality to support the creation and manipulation of schemas. + +Generated schemas can be used in combination with both + [`insert`](https://code.kx.com/pykx/api/pykx-q-data/wrappers.html#pykx.wrappers.Table.insert) and + [`upsert`](https://code.kx.com/pykx/api/pykx-q-data/wrappers.html#pykx.wrappers.Table.upsert) + functionality to create populated table and keyed table objects. +""" from typing import Dict, List, Optional, Union @@ -21,25 +28,24 @@ def __dir__(): _ktype_to_conversion = { k.List: "", - - k.GUIDAtom: "guid", - k.BooleanAtom: "boolean", - k.ByteAtom: "byte", - k.ShortAtom: "short", - k.IntAtom: "int", - k.LongAtom: "long", - k.RealAtom: "real", - k.FloatAtom: "float", + k.GUIDAtom: "guid", k.GUIDVector: "guid", + k.BooleanAtom: "boolean", k.BooleanVector: "boolean", + k.ByteAtom: "byte", k.ByteVector: "byte", + k.ShortAtom: "short", k.ShortVector: "short", + k.IntAtom: "int", k.IntVector: "int", + k.LongAtom: "long", k.LongVector: "long", + k.RealAtom: "real", k.RealVector: "real", + k.FloatAtom: "float", k.FloatVector: "float", k.CharAtom: "char", - k.SymbolAtom: "symbol", - k.TimestampAtom: "timestamp", - k.MonthAtom: "month", - k.DateAtom: "date", - k.DatetimeAtom: "datetime", - k.TimespanAtom: "timespan", - k.MinuteAtom: "minute", - k.SecondAtom: "second", - k.TimeAtom: "time", + k.SymbolAtom: "symbol", k.SymbolVector: "symbol", + k.TimestampAtom: "timestamp", k.TimestampVector: "timestamp", + k.MonthAtom: "month", k.MonthVector: "month", + k.DateAtom: "date", k.DateVector: "date", + k.DatetimeAtom: "datetime", k.DatetimeVector: "datetime", + k.TimespanAtom: "timespan", k.TimespanVector: "timespan", + k.MinuteAtom: "minute", k.MinuteVector: "minute", + k.SecondAtom: "second", k.SecondVector: "sector", + k.TimeAtom: "time", k.TimeVector: "time", } @@ -52,7 +58,7 @@ def builder(schema: Dict, Parameters: schema: The definition of the schema to be created mapping a 'str' to a `pykx.*` type object which is one of the types defined in - `pykx._kytpe_to_conversion`. + `pykx.schema._ktype_to_conversion`. key: A `str`-like object or list of `str` objects denoting the columns within the table defined by `schema` to be treated as primary keys, see [here](https://code.kx.com/q4m3/8_Tables/#841-keyed-table) for @@ -154,6 +160,10 @@ def builder(schema: Dict, mapping = [] idx=0 for i in ktypes: + if i == k.CharVector: + raise Exception("Error: setting column to 'CharVector' is ambiguous, please use 'List' " + "for columns with rows containing multiple characters or 'CharAtom' if " + "your rows contain a single character") try: qconversion = _ktype_to_conversion[i] except KeyError as e: diff --git a/src/pykx/system.py b/src/pykx/system.py index 8cd44ba..bdef11b 100644 --- a/src/pykx/system.py +++ b/src/pykx/system.py @@ -14,8 +14,8 @@ def __dir__(): class SystemCommands: """Wrappers for `q` system commands. - More documentation on all the system commands available to `q` can being - (found here)[https://code.kx.com/q/basics/syscmds/]. + More documentation on all the system commands available to `q` can be + [found here](https://code.kx.com/q/basics/syscmds/). """ def __init__(self, q: Q): @@ -138,6 +138,12 @@ def namespace(self, ns=None): ``` kx.q.system.namespace('foo') ``` + + Return to the default namespace. + + ``` + kx.q.system.namespace('') + ``` """ if 'QConnection' in str(self._q): raise QError('Namespaces do not work over IPC.') diff --git a/src/pykx/toq.pyx b/src/pykx/toq.pyx index baab0bb..c436fb2 100644 --- a/src/pykx/toq.pyx +++ b/src/pykx/toq.pyx @@ -859,7 +859,7 @@ def from_list(x: list, except NameError: pass #Once on import q does not exist else: - res = q('{value $[9h~type first x;count[x]#0x0;x]!x}',res) + res = q('{value $[9h~type first x;count[x]#0x0;x]!x}',res, skip_debug=True) return res @@ -1016,6 +1016,9 @@ _dtype_to_ktype = { np.dtype('int64'): k.LongVector, np.dtype('float32'): k.RealVector, np.dtype('float64'): k.FloatVector, + np.dtype('datetime64[s]'): k.TimestampVector, + np.dtype('datetime64[ms]'): k.TimestampVector, + np.dtype('datetime64[us]'): k.TimestampVector, np.dtype('datetime64[ns]'): k.TimestampVector, np.dtype('datetime64[M]'): k.MonthVector, np.dtype('datetime64[D]'): k.DateVector, @@ -1144,6 +1147,12 @@ def from_numpy_ndarray(x: np.ndarray, | | +----------------------+------------------------+ | | | | `'datetime64[ns]'` | `pykx.TimestampVector` | | | | +----------------------+------------------------+ | + | | | `'datetime64[us]'` | `pykx.TimestampVector` | | + | | +----------------------+------------------------+ | + | | | `'datetime64[ms]'` | `pykx.TimestampVector` | | + | | +----------------------+------------------------+ | + | | | `'datetime64[s]'` | `pykx.TimestampVector` | | + | | +----------------------+------------------------+ | | | | `'timedelta64[s]'` | `pykx.SecondVector` | | | | +----------------------+------------------------+ | | | | `'datetime64[M]'` | `pykx.MonthVector` | | @@ -1294,13 +1303,20 @@ def from_numpy_ndarray(x: np.ndarray, elif ktype in supported_np_temporal_types: if ktype is k.TimestampVector or ktype is k.TimespanVector: offset = TIMESTAMP_OFFSET if ktype is k.TimestampVector else 0 - x = x.view(np.int64) + if x.dtype == np.dtype(' k.K: diff --git a/src/pykx/util.py b/src/pykx/util.py index 766f07d..da99cb7 100644 --- a/src/pykx/util.py +++ b/src/pykx/util.py @@ -322,8 +322,8 @@ def env_information(): 'PYKX_NOQCE', 'PYKX_Q_LIB_LOCATION', 'PYKX_RELEASE_GIL', 'PYKX_Q_LOCK', 'PYKX_DEFAULT_CONVERSION', 'PYKX_SKIP_UNDERQ', 'PYKX_UNSET_GLOBALS', 'PYKX_DEBUG_INSIGHTS_LIBRARIES', 'PYKX_EXECUTABLE', 'PYKX_PYTHON_LIB_PATH', - 'PYKX_PYTHON_BASE_PATH', 'PYKX_PYTHON_HOME_PATH', 'PYKX_DIR', - 'PYKX_UNLICENSED', 'PYKX_LICENSED', 'PYKX_BETA_FEATURES', 'PYKX_NO_SIGNAL' + 'PYKX_PYTHON_BASE_PATH', 'PYKX_PYTHON_HOME_PATH', 'PYKX_DIR', 'PYKX_QDEBUG', + 'PYKX_THREADING', 'PYKX_4_1_ENABLED' ] for x in envs: diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py index 566e19a..d10df40 100644 --- a/src/pykx/wrappers.py +++ b/src/pykx/wrappers.py @@ -498,7 +498,12 @@ def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: boo def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return self.py(raw=raw) - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None + ): return self.np(raw=raw) def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): @@ -663,7 +668,12 @@ def np(self, return np.datetime64(_wrappers.k_j(self) + epoch_offset, self._np_type) return np.datetime64(_wrappers.k_i(self) + self._epoch_offset, self._np_type) - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): if raw: return self.np(raw=True) return pd.Timestamp(self.np()) @@ -792,7 +802,7 @@ class DatetimeAtom(TemporalFixedAtom): """Wrapper for q datetime atoms. Warning: The q datetime type is deprecated. - PyKX does not provide a rich interface for the q datetime type, as it is depreceated. Avoid + PyKX does not provide a rich interface for the q datetime type, as it is deprecated. Avoid using it whenever possible. """ t = -15 @@ -1520,8 +1530,14 @@ def __arrow_array__(self, type=None): def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True): return self.np(raw=raw, has_nulls=has_nulls).tolist() - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): - return pd.Series(self.np(raw=raw, has_nulls=has_nulls), copy=False) + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): + res = pd.Series(self.np(raw=raw, has_nulls=has_nulls), copy=False) + return res def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): if pa is None: @@ -1985,11 +2001,17 @@ def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): ) return arr - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): arr = self.np(raw=raw, has_nulls=has_nulls) if isinstance(arr, np.ma.MaskedArray): arr = pd.arrays.IntegerArray(arr, mask=arr.mask, copy=False) - return pd.Series(arr, copy=False) + res = pd.Series(arr, copy=False) + return res class BooleanVector(IntegralNumericVector): @@ -2145,11 +2167,17 @@ def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): raise PyArrowUnavailable # nocov return _raw_guids_to_arrow(self.np(raw=True)) - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): if raw: return PandasUUIDArray(self.np(raw=raw)) else: - return super().pd() + res = super().pd() + return res class ByteVector(IntegralNumericVector): @@ -2440,7 +2468,7 @@ def py(self, if x is None: null_pos.append(converted_vector.index(x)) for i in null_pos: - converted_vector[i]=q('0Np') + converted_vector[i]=TimestampAtom(None) return converted_vector @@ -2563,10 +2591,16 @@ def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return _wrappers.k_vec_to_array(self, _wrappers.NPY_INT64) return q('value', self).np() - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): if raw: return super(self).pd(raw=raw, has_nulls=has_nulls) - return pd.Series(self.np(raw=raw, has_nulls=has_nulls), dtype='category') + res = pd.Series(self.np(raw=raw, has_nulls=has_nulls), dtype='category') + return res class Anymap(List): @@ -2582,8 +2616,14 @@ def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: boo def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return self._as_list().np(raw=raw, has_nulls=has_nulls) - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): - return self._as_list().pd(raw=raw, has_nulls=has_nulls) + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): + res = self._as_list().pd(raw=raw, has_nulls=has_nulls) + return res def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return self._as_list().pa(raw=raw, has_nulls=has_nulls) @@ -2740,7 +2780,13 @@ def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: boo self._values.py(raw=raw, has_nulls=has_nulls, stdlib=stdlib), )) - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None, raw_guids=False): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + raw_guids=False, + ): if raw_guids and not raw: v = [x.np(raw=isinstance(x, GUIDVector), has_nulls=has_nulls) for x in self._values] v = [PandasUUIDArray(x) if x.dtype == complex else x for x in v] @@ -3366,7 +3412,12 @@ def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return q('0!', self).np(raw=raw, has_nulls=has_nulls) raise LicenseException('convert a keyed table to a Numpy representation') - def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def pd( + self, + *, + raw: bool = False, + has_nulls: Optional[bool] = None, + ): kk = self._keys._keys vk = self._values._keys kvg = self._keys._values._unlicensed_getitem @@ -4135,13 +4186,12 @@ def py(self, stdlib=None): def __call__(self, *args, **kwargs): if not licensed: raise LicenseException('call a q function in a Python process') - return q( - '{x`}', - q('{.pykx.wrap[x]}', self)( + if q('.pykx.util.isf', self).py(): + return q('{.pykx.wrap[x][<]}', self)( *[K(x) for x in args], **{k: K(v) for k, v in kwargs.items()} ) - ) + return q('{x . y}', self, [*[K(x) for x in args]]) @property def params(self): diff --git a/tests/conftest.py b/tests/conftest.py index 3a87a16..66c7d2f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -81,7 +81,11 @@ def q(request, kx, q_init): def random_free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('localhost', 0)) - return s.getsockname()[1] + port = s.getsockname()[1] + if port == 15001 or port == 15002 or port == 15003 or port == 15004: + return random_free_port() + else: + return port # Need to know what $QHOME to use for the q subprocesses before PyKX changes it. diff --git a/tests/test_compress_encrypt.py b/tests/test_compress_encrypt.py new file mode 100644 index 0000000..8d2cd0a --- /dev/null +++ b/tests/test_compress_encrypt.py @@ -0,0 +1,102 @@ +import os +from pathlib import Path + +# Do not import pykx here - use the `kx` fixture instead! +import pytest + + +def test_compress_encrypt_errors(kx): + with pytest.raises(ValueError) as err: + kx.Encrypt('/path') + assert "Provided 'path' does not exist" in str(err.value) + + with pytest.raises(ValueError) as err: + kx.Encrypt('tests/test_files/encrypt.txt') + assert "Password provided is None, please provide a str object" in str(err.value) + + with pytest.raises(TypeError) as err: + kx.Encrypt('tests/test_files/encrypt.txt', 10) + assert "Password must be supplied as a string" in str(err.value) + + with pytest.raises(kx.QError) as err: + encrypt=kx.Encrypt('tests/test_files/testkek.key', 'passwrd') + encrypt.load_key() + assert "Invalid password for" in str(err.value) + + with pytest.raises(ValueError) as err: + kx.Compress(block_size=24) + assert 'block_size must be a power of 2' in str(err.value) + + with pytest.raises(ValueError) as err: + kx.Compress(algo=kx.CompressionAlgorithm.zstd) + assert "'CompressionAlgorithm.zstd' only supported on" in str(err.value) + + with pytest.raises(ValueError) as err: + kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=100) + assert 'Invalid level 100 for CompressionAlgorithm.gzip' in str(err.value) + + +@pytest.mark.isolate +def test_compression(): + os.environ['PYKX_BETA_FEATURES'] = 'True' + import pykx as kx + compress = kx.Compress(kx.CompressionAlgorithm.ipc) + compress.global_init() + assert kx.q.z.zd.py() == [17, 1, 0] + + compress = kx.Compress(kx.CompressionAlgorithm.gzip, level=9) + compress.global_init() + assert kx.q.z.zd.py() == [17, 2, 9] + + compress = kx.Compress(kx.CompressionAlgorithm.lz4hc, level=10) + compress.global_init() + assert kx.q.z.zd.py() == [17, 4, 10] + + +@pytest.mark.isolate +def test_compression_4_1(): + os.environ['PYKX_4_1_ENABLED'] = 'True' + os.environ['PYKX_BETA_FEATURES'] = 'True' + import pykx as kx + compress = kx.Compress(kx.CompressionAlgorithm.zstd, level=0) + compress.global_init() + assert kx.q.z.zd.py() == [17, 5, 0] + + +@pytest.mark.isolate +def test_encrypt(): + os.environ['PYKX_BETA_FEATURES'] = 'True' + import pykx as kx + encrypt = kx.Encrypt('tests/test_files/testkek.key', 'password') + encrypt.load_key() + # If this has run, the encryption key has been loaded appropriately + # this can be tested more rigorously once kdb+ 4.0 2024.03.02 + assert kx.q('-36!(::)').py() + + +@pytest.mark.isolate +def test_encrypt_path(): + os.environ['PYKX_BETA_FEATURES'] = 'True' + import pykx as kx + encrypt = kx.Encrypt(Path('tests/test_files/testkek.key'), 'password') + encrypt.load_key() + # If this has run, the encryption key has been loaded appropriately + # this can be tested more rigorously once kdb+ 4.0 2024.03.02 + assert kx.q('-36!(::)').py() + + +@pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_beta(): + import pykx as kx + + with pytest.raises(kx.QError) as err: + kx.Compress() + assert 'Attempting to use a beta feature "Compress' in str(err.value) + + with pytest.raises(kx.QError) as err: + kx.Encrypt() + assert 'Attempting to use a beta feature "Compress' in str(err.value) diff --git a/tests/test_db.py b/tests/test_db.py index 0cc7744..8004656 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -8,6 +8,8 @@ @pytest.mark.order(1) def test_creation(kx): + # Definition of qtab would break kx.DB prior to use of .Q.pt + kx.q('qtab:([]100?1f;100?1f)') db = kx.DB(path='db') tab = kx.Table(data={ 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), @@ -241,6 +243,24 @@ def test_load_warning(kx): assert type(db.table.table) == kx.PartitionedTable # noqa: E721 +@pytest.mark.order(19) +def test_compress(kx): + zd_cache = kx.q.z.zd + compress = kx.Compress(kx.CompressionAlgorithm.gzip, level=8) + db = kx.DB(path='db') + qtab = kx.Table(data={ + 'col1': kx.random.random(1000, 10.0), + 'col2': kx.random.random(1000, 10) + }) + db.create(qtab, 'comptab', kx.q('2015.01.02'), compress=compress) + db.fill_database() + assert zd_cache == kx.q.z.zd + compress_info = kx.q('-21!key`:./2015.01.02/comptab/col1') + assert type(compress_info) == kx.Dictionary + assert compress_info['algorithm'].py() == 2 + assert compress_info['zipLevel'].py() == 8 + + def test_enumerate(kx): tab = kx.Table(data={ 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), @@ -294,23 +314,6 @@ def test_beta(): assert 'Attempting to use a beta feature "Data' in str(err.value) -def test_splay(kx): - os.mkdir('splay') - kx.q['tab'] = kx.Table(data={ - 'x': kx.random.random(100, ['a', 'b', 'c']), - 'x1': kx.random.random(100, 1.0), - 'x2': kx.random.random(100, 10) - }) - kx.q('`:./splay/tab set .Q.en[`:./splay;tab]') - db = kx.DB() - db.load('splay') - assert type(db.tab) == kx.Table # noqa: E721 - with pytest.raises(kx.QError) as err: - db.rename_column('tab', 'x', 'x3') - shutil.rmtree('../splay') - assert 'Application of Database Management functionality' in str(err.value) - - @pytest.mark.order(-1) def test_cleanup(kx): shutil.rmtree('db') diff --git a/tests/test_files/encrypt.txt b/tests/test_files/encrypt.txt new file mode 100644 index 0000000..16b14f5 --- /dev/null +++ b/tests/test_files/encrypt.txt @@ -0,0 +1 @@ +test file diff --git a/tests/test_files/testkek.key b/tests/test_files/testkek.key new file mode 100644 index 0000000..9208f50 Binary files /dev/null and b/tests/test_files/testkek.key differ diff --git a/tests/test_ipc.py b/tests/test_ipc.py index 28451fa..2c4685c 100644 --- a/tests/test_ipc.py +++ b/tests/test_ipc.py @@ -660,52 +660,154 @@ def test_large_IPC(kx, q_port): @pytest.mark.unlicensed def test_debug_kwarg(kx, q_port): with kx.SyncQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') assert q('til 10', debug=True).py() == list(range(10)) with pytest.raises(kx.QError) as e: - q('til "asd"') - assert '[1]' in str(e) + q('til "asd"', debug=True) + assert 'type' in str(e) + assert q('{[x] til x}', 10, debug=True).py() == list(range(10)) with pytest.raises(kx.QError) as e: - q('{[x] til x}', b'asd') - assert '[1]' in str(e) + q('{til x}', b'asd', debug=True) + assert 'type' in str(e) + assert b'{til x}' == q('.pykx_test.cache').py()[1][1][-1] + assert q('{[x; y] .[mavg; (x; til y)]}', 3, 10, debug=True).py() ==\ [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] with pytest.raises(kx.QError) as e: - q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd') - assert '[1]' in str(e) + q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd', debug=True) + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == q('.pykx_test.cache').py()[1][1][-1] + q('.Q.sbt:.pykx_test.cache_sbt') + with kx.SecureQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') assert q('til 10', debug=True).py() == list(range(10)) with pytest.raises(kx.QError) as e: q('til "asd"') - assert '[1]' in str(e) + assert 'type' in str(e) + assert q('{[x] til x}', 10, debug=True).py() == list(range(10)) with pytest.raises(kx.QError) as e: - q('{[x] til x}', b'asd') - assert '[1]' in str(e) + q('{til x}', b'asd', debug=True) + assert 'type' in str(e) + assert b'{til x}' == q('.pykx_test.cache').py()[1][1][-1] + assert q('{[x; y] .[mavg; (x; til y)]}', 3, 10, debug=True).py() ==\ [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] + with pytest.raises(kx.QError) as e: + q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd', debug=True) + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == q('.pykx_test.cache').py()[1][1][-1] + + q('.Q.sbt:.pykx_test.cache_sbt') + + +@pytest.mark.isolate +def test_debug_kwarg_global(q_port): + os.environ['PYKX_QDEBUG'] = 'True' + import pykx as kx + with kx.SyncQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') + assert q('til 10').py() == list(range(10)) + with pytest.raises(kx.QError) as e: + q('til "asd"') + assert 'type' in str(e) + + assert q('{[x] til x}', 10).py() == list(range(10)) + with pytest.raises(kx.QError) as e: + q('{til x}', b'asd') + assert 'type' in str(e) + assert b'{til x}' == q('.pykx_test.cache')[1][1][-1].py() + + assert q('{[x; y] .[mavg; (x; til y)]}', 3, 10).py() ==\ + [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] with pytest.raises(kx.QError) as e: q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd') - assert '[1]' in str(e) + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == q('.pykx_test.cache')[1][1][-1].py() + q('.Q.sbt:.pykx_test.cache_sbt') + + with kx.SecureQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') + assert q('til 10').py() == list(range(10)) + with pytest.raises(kx.QError) as e: + q('til "asd"') + assert 'type' in str(e) + + assert q('{[x] til x}', 10, debug=True).py() == list(range(10)) + with pytest.raises(kx.QError) as e: + q('{til x}', b'asd') + assert 'type' in str(e) + assert b'{til x}' == q('.pykx_test.cache')[1][1][-1].py() + + assert q('{[x; y] .[mavg; (x; til y)]}', 3, 10).py() ==\ + [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] + with pytest.raises(kx.QError) as e: + q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd') + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == q('.pykx_test.cache')[1][1][-1].py() + q('.Q.sbt:.pykx_test.cache_sbt') @pytest.mark.asyncio @pytest.mark.unlicensed async def test_debug_kwarg_async(kx, q_port): async with kx.AsyncQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') assert (await q('til 10', debug=True)).py() == list(range(10)) with pytest.raises(kx.QError) as e: await q('til "asd"') - assert '[1]' in str(e) + assert 'type' in str(e) + assert (await q('{[x] til x}', 10, debug=True)).py() == list(range(10)) with pytest.raises(kx.QError) as e: - await q('{[x] til x}', b'asd') - assert '[1]' in str(e) + await q('{til x}', b'asd', debug=True) + assert 'type' in str(e) + assert b'{til x}' == (await q('.pykx_test.cache')).py()[1][1][-1] + + assert (await q('{[x; y] .[mavg; (x; til y)]}', 3, 10, debug=True)).py()\ + == [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] + with pytest.raises(kx.QError) as e: + await q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd', debug=True) + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == (await q('.pykx_test.cache')).py()[1][1][-1] + + q('.Q.sbt:.pykx_test.cache_sbt') + + +@pytest.mark.isolate +@pytest.mark.asyncio +async def test_debug_kwarg_async_global(q_port): + import os + os.environ['PYKX_QDEBUG'] = 'True' + import pykx as kx + async with kx.AsyncQConnection(port=q_port) as q: + q('.pykx_test.cache_sbt:.Q.sbt') + q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') + assert (await q('til 10')).py() == list(range(10)) + with pytest.raises(kx.QError) as e: + await q('til "asd"') + assert 'type' in str(e) + + assert (await q('{[x] til x}', 10)).py() == list(range(10)) + with pytest.raises(kx.QError) as e: + await q('{til x}', b'asd') + assert 'type' in str(e) + assert b'{til x}' == (await q('.pykx_test.cache'))[1][1][-1].py() + assert (await q('{[x; y] .[mavg; (x; til y)]}', 3, 10, debug=True)).py()\ == [0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] with pytest.raises(kx.QError) as e: await q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd') - assert '[1]' in str(e) + assert 'type' in str(e) + assert b'{[x; y] .[mavg; (x; til y)]}' == (await q('.pykx_test.cache'))[1][1][-1].py() + q('.Q.sbt:.pykx_test.cache_sbt') @pytest.mark.embedded @@ -723,3 +825,161 @@ async def test_debug_kwarg_embedded(kx, q): with pytest.raises(kx.QError) as e: q('{[x; y] .[mavg; (x; til y)]}', 3, b'asd') assert '[1]' in str(e) + + +@pytest.mark.unlicensed +def test_SyncQConnection_reconnect(kx): + q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip() + proc = subprocess.Popen( + [q_exe_path, '-p', '15001'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + + conn = kx.QConnection(port=15001, reconnection_attempts=1) + + assert conn('til 20').py() == list(range(20)) + proc.kill() + time.sleep(2) + with pytest.raises(BaseException): + conn('til 5') + + proc = subprocess.Popen( + [q_exe_path, '-p', '15001'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + assert conn('til 10').py() == list(range(10)) + proc.kill() + time.sleep(2) + + +@pytest.mark.unlicensed +def test_SecureQConnection_reconnect(kx): + q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip() + proc = subprocess.Popen( + [q_exe_path, '-p', '15002'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + + conn = kx.SecureQConnection(port=15002, reconnection_attempts=1) + + assert conn('til 20').py() == list(range(20)) + proc.kill() + time.sleep(2) + with pytest.raises(BaseException): + conn('til 5') + + proc = subprocess.Popen( + [q_exe_path, '-p', '15002'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + assert conn('til 10').py() == list(range(10)) + proc.kill() + time.sleep(2) + + +@pytest.mark.asyncio +@pytest.mark.unlicensed +async def test_AsyncQConnection_reconnect(kx): + q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip() + proc = subprocess.Popen( + [q_exe_path, '-p', '15003'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + + conn = await kx.AsyncQConnection( + port=15003, + reconnection_attempts=1, + event_loop=asyncio.get_event_loop() + ) + + assert (await conn('til 20')).py() == list(range(20)) + proc.kill() + time.sleep(2) + with pytest.raises(BaseException): + await conn('til 5') + + proc = subprocess.Popen( + [q_exe_path, '-p', '15003'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + time.sleep(2) + assert (await conn('10?`a`b`c`d')).py() is None + assert (await conn('til 10')).py() == list(range(10)) + fut = conn('{t:.z.p;while[.z.p raised for column a" + + with pytest.raises(Exception) as err_info: + kx.schema.builder({'a': kx.CharVector, 'b': kx.IntAtom}) + assert "Error: setting column to 'CharVector' is ambiguous" in str(err_info.value) diff --git a/tests/test_sql.py b/tests/test_sql.py index 38254c3..01da07e 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -161,3 +161,13 @@ def test_sql_get_input_values(q, kx): assert q.sql.get_input_types(p1) == ['FloatAtom/FloatVector', 'LongAtom/LongVector'] assert q.sql.get_input_types(p2) == ['SymbolAtom/SymbolVector', 'FloatAtom/FloatVector'] + + +@pytest.mark.embedded +def test_sql_string_col(q): + q('t:([] optid:1 2 3;Market:`a`b`CBOE;date:3#2023.11.14;Symbol:("a";"b";"odMP=20"))') + qres = q('''select optid,Market,Symbol from t + where date = 2023.11.14,Market=`CBOE,Symbol like "odMP=20"''') + sqlres = q("""s)select optid,Market,Symbol from t + where date = 2023.11.14 and Market IN ('CBOE') and Symbol LIKE ('odMP=20');""") + assert q('~', qres, sqlres) diff --git a/tests/test_toq.py b/tests/test_toq.py index 5d744ee..51b6e68 100644 --- a/tests/test_toq.py +++ b/tests/test_toq.py @@ -382,6 +382,28 @@ def test_from_datetime64(kx): kx.DatetimeAtom(d) +@pytest.mark.unlicensed +@pytest.mark.nep49 +def test_from_datetime64_smsus(kx): + d = np.array(['2020-09-08T07:06:05.000004'], dtype='datetime64[us]') + + kd = kx.K(d) + assert isinstance(kd, kx.TimestampVector) + assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + + d = np.array(['2020-09-08T07:06:05.004'], dtype='datetime64[ms]') + + kd = kx.K(d) + assert isinstance(kd, kx.TimestampVector) + assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + + d = np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]') + + kd = kx.K(d) + assert isinstance(kd, kx.TimestampVector) + assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + + @pytest.mark.unlicensed @pytest.mark.nep49 def test_from_timedelta64(kx): @@ -1109,11 +1131,11 @@ def test_from_pandas_categorical(q, kx, pd): @pytest.mark.nep49 def test_toq_pd_tabular_ktype(q, kx): df = pd.DataFrame.from_dict({'x': [1, 2], 'y': ['a', 'b']}) - assert kx.toq(df).dtypes['type'].py() == [b'kx.LongAtom', b'kx.SymbolAtom'] + assert kx.toq(df).dtypes['datatypes'].py() == [b'kx.LongAtom', b'kx.SymbolAtom'] kval = {'x': kx.FloatVector} - assert kx.toq(df, ktype=kval).dtypes['type'].py() == [b'kx.FloatAtom', b'kx.SymbolAtom'] + assert kx.toq(df, ktype=kval).dtypes['datatypes'].py() == [b'kx.FloatAtom', b'kx.SymbolAtom'] kval = {'x': kx.FloatVector, 'y': kx.CharVector} - assert kx.toq(df, ktype=kval).dtypes['type'].py() == [b'kx.FloatAtom', b'kx.CharVector'] + assert kx.toq(df, ktype=kval).dtypes['datatypes'].py() == [b'kx.FloatAtom', b'kx.CharVector'] with pytest.raises(ValueError, match="Column name passed in dictionary not present in df table"): # noqa: E501 kx.toq(df, ktype={'x1': kx.FloatVector}) with pytest.raises(kx.QError, match="Not supported:.*"): @@ -1124,11 +1146,11 @@ def test_toq_pd_tabular_ktype(q, kx): def test_toq_pa_tabular_ktype(q, kx, pa): pdtab = pd.DataFrame.from_dict({'x': [1, 2], 'y': ['a', 'b']}) df = pa.Table.from_pandas(pdtab) - assert kx.toq(df).dtypes['type'].py() == [b'kx.LongAtom', b'kx.SymbolAtom'] + assert kx.toq(df).dtypes['datatypes'].py() == [b'kx.LongAtom', b'kx.SymbolAtom'] kval = {'x': kx.FloatVector} - assert kx.toq(df, ktype=kval).dtypes['type'].py() == [b'kx.FloatAtom', b'kx.SymbolAtom'] + assert kx.toq(df, ktype=kval).dtypes['datatypes'].py() == [b'kx.FloatAtom', b'kx.SymbolAtom'] kval = {'x': kx.FloatVector, 'y': kx.CharVector} - assert kx.toq(df, ktype=kval).dtypes['type'].py() == [b'kx.FloatAtom', b'kx.CharVector'] + assert kx.toq(df, ktype=kval).dtypes['datatypes'].py() == [b'kx.FloatAtom', b'kx.CharVector'] with pytest.raises(ValueError, match="Column name passed in dictionary not present in df table"): # noqa: E501 kx.toq(df, ktype={'x1': kx.FloatVector}) with pytest.raises(kx.QError, match="Not supported:.*"): diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 0047c40..0587c04 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -16,6 +16,7 @@ # Do not import Pandas, PyArrow, or PyKX here - use the pd/pa/kx fixtures instead! import numpy as np +import pandas as pd import pytest import pytz from packaging import version @@ -1453,7 +1454,7 @@ def test_has_null_and_has_inf(self, q): assert not q('0xdeadbeef').has_infs def f(type_code, zero): - v = q(f'v:v where (not &[;]. v=/:(neg z;z:0W{type_code})) & not null v:100?{zero};v') + v = q(f'v:v where (not any v=/:(neg z;z:0W{type_code})) & not null v:100?{zero};v') assert not v.has_nulls assert not v.has_infs assert q(f'@[v;-3?50;:;0N{type_code}]').has_nulls @@ -1467,10 +1468,16 @@ def f(type_code, zero): for type_code, zero in types: f(type_code, zero) - def test_np_timestampvector_nulls(self, q, kx): + def test_np_timestampvector_nulls(self, kx): assert kx.q('0Np').py() is None assert kx.q('enlist 0Np').py() == [kx.TimestampAtom(kx.q('0Np'))] + @pytest.mark.unlicensed + def test_np_timestampvector_nulls_IPC(self, kx, q_port): + with kx.QConnection(port=q_port) as conn: + r = conn('([] t:2#0Np)').py() + assert r['t'][0].py() is None + class Test_List: v = '(0b;"G"$"00000000-0000-0000-0000-000000000001";0x02;3h;4i;5j;6e;7f)' @@ -3560,6 +3567,31 @@ def test_attributes_table(kx, q): tab.parted(['x', 'x1']) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +@pytest.mark.licensed +def test_foreign_call(kx, q): + isf = q('.pykx.util.isf') + assert not q('.pykx.util.isf[1b]').py() + assert not q('.pykx.util.isf', True).py() + assert not isf(True).py() + + repr = kx.q('.pykx.util.repr') + with pytest.raises(kx.QError) as err: + q('.pykx.util.repr[1b;1b]') + assert 'Expected a foreign' in str(err.value) + + with pytest.raises(kx.QError) as err: + q('.pykx.util.repr', True, True) + assert 'Expected a foreign' in str(err.value) + + with pytest.raises(kx.QError) as err: + repr(True, True) + assert 'Expected a foreign' in str(err.value) + + @pytest.mark.licensed def test_attributes_keyed_table(kx, q): tab = q('([til 10] x1: 10 + til 10)') @@ -4066,3 +4098,119 @@ def test_repr_html(kx, q): q('system"l ."') tab = q('partitionedTab') assert (6, 49, 105) == checkHTML(tab) + + +@pytest.mark.unlicensed +@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) +def test_pyarrow_pandas_ci_only(q): + if os.getenv('CI'): + with pytest.raises(NotImplementedError): + q('get`:a set (' + '(1 2;3 4);' + '`time`price`vol!(2022.03.29D16:45:14.880819;1.;100i);' + '([]a:1 2;b:("ab";"cd")))' + ).pd(as_arrow=True) + + +@pytest.mark.unlicensed +@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) +@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") +def test_pyarrow_pandas_all_ipc(kx, q_port): + with kx.QConnection(port=q_port) as q: + def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str: + query = '{@[;0;string]x#/:prd[x]?/:("pdmnuvtbhijef"$\\:0)}' + t = q(query, q('enlist', num_rows)) + if q('{any(raze null x),raze(v,neg v:0W 2147483647 32767)=\\:raze"j"$x:5_x}', t): + t = q(query, q('enlist', num_rows)) + t = q(f'enlist[({num_rows}?.Q.a)],', t) + q[table_name] = t + q(f'{table_name}[1]: prd[enlist[{num_rows}]]?/:("p"$\\:0)') + return table_name + + gen_q_datatypes_table(q, 'tab', 100) + for vec in q('tab'): + assert 'pyarrow' in str(vec.pd(as_arrow=True)) + q('tab: flip (`a`b`c`d`e`f`g`h`i`j`k`l`m`n)!(tab)') + + cols = q('cols tab').py() + dfa = q('tab').pd(as_arrow=True) + for c in cols: + assert 'pyarrow' in str(dfa[c].dtype) + q('tab: (til 100)!(tab)') + + with pytest.raises(NotImplementedError): + q('10?0Ng').pd(as_arrow=True) + + with pytest.raises(NotImplementedError): + q('0Nm').pd(as_arrow=True) + + with pytest.raises(NotImplementedError): + q('0Nu').pd(as_arrow=True) + + +@pytest.mark.unlicensed +@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) +@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") +def test_pyarrow_pandas_all(q): + def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str: + query = '{@[;0;string]x#/:prd[x]?/:("pdmnuvtbhijef"$\\:0)}' + t = q(query, q('enlist', num_rows)) + if q('{any(raze null x),raze(v,neg v:0W 2147483647 32767)=\\:raze"j"$x:5_x}', t): + t = q(query, q('enlist', num_rows)) + t = q(f'enlist[({num_rows}?.Q.a)],', t) + q[table_name] = t + q(f'{table_name}[1]: prd[enlist[{num_rows}]]?/:("p"$\\:0)') + return table_name + + gen_q_datatypes_table(q, 'tab', 100) + for vec in q('tab'): + assert 'pyarrow' in str(vec.pd(as_arrow=True)) + q('tab: flip (`a`b`c`d`e`f`g`h`i`j`k`l`m`n)!(tab)') + + cols = q('cols tab').py() + dfa = q('tab').pd(as_arrow=True) + for c in cols: + assert 'pyarrow' in str(dfa[c].dtype) + q('tab: (til 100)!(tab)') + + with pytest.raises(NotImplementedError): + q('10?0Ng').pd(as_arrow=True) + + with pytest.raises(NotImplementedError): + q('`u$v:6#u:`abc`xyz`hmm').pd(as_arrow=True) + + with pytest.raises(NotImplementedError): + q('0Nm').pd(as_arrow=True) + + with pytest.raises(NotImplementedError): + q('0Nu').pd(as_arrow=True) + + +@pytest.mark.embedded +@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) +@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") +def test_pyarrow_pandas_table_roundtrip(kx): + kx.q('gen_data:{@[;0;string]x#/:prd[x]?/:(`6;`6;0Ng),("bxhijefpdnuvt"$\\:0)}') + kx.q('gen_names:{"dset_",/:x,/:string til count y}') + kx.q('dset_1D:gen_data[enlist 50]') + kx.q('one_tab: flip(`$gen_names["1_tab";dset_1D])!dset_1D') + + tab = kx.q['one_tab'] + tab2 = kx.toq(tab.pd(as_arrow=True)) + + for x in tab.keys(): + assert isinstance(tab2[x], type(tab[x])) + if x == 'dset_1_tab12': + assert all([x < 1000 for x in (tab[x]._values - tab2[x]._values).np()[0].astype(int)]) # noqa + else: + assert (tab[x]._values == tab2[x]._values).all() + + +@pytest.mark.embedded +@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") +@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) +def test_pyarrow_pandas_timedeltas(kx): + tds = kx.toq(kx.q(''' + ([] a:1D 1D01 1D01:02 1D01:01:01 1D01:01:01.001 1D01:01:01.001001 1D01:01:01.001001001) + ''').pd(as_arrow=True)['a']) + assert ([-17, -17, -17, -18, -19, -16, -16] == kx.q('{type each x}', tds)).all() diff --git a/w64_install.ps1 b/w64_install.ps1 new file mode 100644 index 0000000..e248b12 --- /dev/null +++ b/w64_install.ps1 @@ -0,0 +1,15 @@ +Install-Module VcRedist -Force -AllowClobber +if (!(Test-Path -Path .\vcredist)){ + mkdir vcredist +} +$VcRedist = Get-VcList -Export All | Where-Object { $_.Release -eq '2010' -and $_.Architecture -eq 'x64' } +Save-VcRedist -Path 'vcredist' $VcRedist +Install-VcRedist -Path 'vcredist' -Silent $VcRedist +Invoke-WebRequest https://aka.ms/vs/16/release/vs_BuildTools.exe -UseBasicParsing -OutFile 'vs_BuildTools.exe' +./vs_BuildTools.exe --nocache --wait --quiet --norestart --includeRecommended --includeOptional --add Microsoft.VisualStudio.Workload.VCTools +if(Test-Path -Path .\vcpkg){ + Remove-Item -Recurse -Force .\vcpkg +} +git clone https://github.com/microsoft/vcpkg 'vcpkg' +vcpkg/bootstrap-vcpkg.bat -disableMetrics +vcpkg/vcpkg.exe install dlfcn-win32:x64-windows-static-md