Skip to content

Commit

Permalink
Code bug fix & 1.0.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
PApostol committed Oct 22, 2021
1 parent 709befd commit 9286439
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 21 deletions.
9 changes: 6 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## Spark-submit
##### Latest version: 0.1.0
### Spark-submit

#### 0.1.0 (2021-10-16)
##### 1.0.1 (2021-10-22)
- Fix wrong return code bug in `get_code()`
- Change argument order in `submit()`

##### 1.0.0 (2021-10-16)
- First release
21 changes: 15 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Spark-submit

[![PyPI version](https://badge.fury.io/py/spark-submit.svg)](https://badge.fury.io/py/spark-submit)
[![Github All Releases](https://img.shields.io/github/downloads/PApostol/spark-submit/total.svg)]()
[![PyPI - Downloads](https://img.shields.io/pypi/dm/spark-submit)](https://pypi.org/project/spark-submit/)
[![](https://img.shields.io/badge/python-3.5+-blue.svg)](https://www.python.org/downloads/)
[![License](https://img.shields.io/badge/License-MIT-blue)](#license "Go to license section")
[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/PApostol/spark-submit/issues)
Expand Down Expand Up @@ -59,7 +59,7 @@ print(app.submit_cmd)
print(app.env_vars)
# monitor state in the background every x seconds with `await_result=x`
app.submit(use_env_vars=True, await_result=10)
app.submit(await_result=10, use_env_vars=True)
print(app.get_state()) # 'SUBMITTED'
Expand Down Expand Up @@ -134,6 +134,15 @@ spark_args = {
main_file = 's3a://mybucket/file.jar'
app = SparkJob(main_file, **spark_args)
```

#### Testing

You can do some simple testing with local mode Spark after cloning the repo:

`python tests/run_test.py`

Note any additional requirements for running the tests: `pip install -r tests/requirements.txt`

#### Additional methods

`spark_submit.system_info()`: Collects Spark related system information, such as versions of spark-submit, Scala, Java, Python and OS
Expand All @@ -146,8 +155,8 @@ app = SparkJob(main_file, **spark_args)

### License

Released under [MIT](/LICENSE) by [@PApostol](https://github.com/PApostol)
Released under [MIT](/LICENSE) by [@PApostol](https://github.com/PApostol).

- You can freely modify and reuse
- The original license must be included with copies of this software
- Please link back to this repo if you use a significant portion the source code
- You can freely modify and reuse.
- The original license must be included with copies of this software.
- Please link back to this repo if you use a significant portion the source code.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
license = about['__license__'],
url = about['__url__'],
description = about['__description__'],
long_description_content_type="text/markdown",
long_description_content_type = 'text/markdown',
long_description = readme,
packages = find_packages(),
include_package_data = True,
Expand Down
2 changes: 1 addition & 1 deletion spark_submit/__info__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__author_email__ = 'foo@bar.com'
__maintainer__ = 'PApostol'
__license__ = 'MIT'
__version__ = '1.0.0'
__version__ = '1.0.1'
__description__ = 'Python manager for spark-submit jobs'
__url__ = 'https://github.com/PApostol/spark-submit'
__bugtrack_url__ = f'{__url__}/issues'
18 changes: 9 additions & 9 deletions spark_submit/sparkjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,16 @@ def _check_submit(self) -> None:
self.submit_response['driver_state'] = driver_state[0]


def submit(self, use_env_vars: bool=False, await_result: int=0) -> None:
def submit(self, await_result: int=0, use_env_vars: bool=False) -> None:
"""Submits the current Spark job to Spark master
Parameters
use_env_vars (bool): whether the environment variables obtained should be used (default: False)
await_result (int): how often to poll for the Spark driver state in a background thread (default: 0, don't monitor in a background thread)
use_env_vars (bool): whether the environment variables obtained should be used (default: False)
Returns:
None
"""
"""
env_vars = ''
if use_env_vars:
for env_var, val in self.env_vars.items():
Expand All @@ -146,7 +146,7 @@ def submit(self, use_env_vars: bool=False, await_result: int=0) -> None:
self.submit_response['driver_state'] = 'ERROR'
raise SparkSubmitError(f'{output}\nReturn code: {code}')

elif self.spark_args['deploy_mode']=='client':
elif self.spark_args['deploy_mode'] == 'client':
self.submit_response['driver_state'] = 'FINISHED'

else:
Expand All @@ -165,7 +165,7 @@ def get_state(self) -> str:
Returns:
str: Spark job driver state
"""
"""
self._check_submit()
return self.submit_response['driver_state']

Expand All @@ -175,7 +175,7 @@ def get_output(self) -> str:
Returns:
str: spark-submit stdout
"""
"""
return self.submit_response['output']


Expand All @@ -184,16 +184,16 @@ def get_code(self) -> int:
Returns:
int: spark-submit return code (returns -1 if no code)
"""
return self.submit_response['code'] if self.submit_response['code'] else -1
"""
return self.submit_response['code'] if isinstance(self.submit_response['code'], int) else -1


def kill(self) -> None:
"""Kills the running Spark job (cluster mode only)
Returns:
None
"""
"""
if self.submit_response['driver_state'] in end_sates:
logging.warning('Spark job "{0}" has concluded with state {1} and cannot be killed.'.format(self.spark_args['name'], self.submit_response['driver_state']))
elif self.submit_response['submission_id']:
Expand Down
2 changes: 1 addition & 1 deletion spark_submit/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def system_info() -> str:
Returns:
str: system information
"""
"""
spark_bin = os.environ.get('SPARK_HOME', os.path.expanduser('~/spark_home')) + '/bin/spark-submit'
cmd = spark_bin + ' --version'

Expand Down

0 comments on commit 9286439

Please sign in to comment.