From 1125801ac8c98221db369d1b4324cad01be54f6d Mon Sep 17 00:00:00 2001 From: frbattid Date: Wed, 8 Jun 2016 15:42:36 +0200 Subject: [PATCH 1/4] [cosmos-tidoop-api][CHANGES_NEXT_RELEASE] Update --- CHANGES_NEXT_RELEASE | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES_NEXT_RELEASE b/CHANGES_NEXT_RELEASE index 6ff7ac5..15745df 100644 --- a/CHANGES_NEXT_RELEASE +++ b/CHANGES_NEXT_RELEASE @@ -9,3 +9,4 @@ - [cosmos] [HARDENING] Fix the base path for readthedocks in mkdocs.yml (#164) - [cosmos] [HARDENING] Update the Quick Start Guide with regards to the new FIWARE Lab clusters (#167) - [cosmos] [HARDENING] Add Hive clients to resources folder (#168) +- [cosmos-tidoop-api] [BUG] Run Java jars located in HDFS (#172) From 3815eb0ea87e7c05cfe1aad6bfc7e0cf7c1fa7c5 Mon Sep 17 00:00:00 2001 From: frbattid Date: Wed, 8 Jun 2016 15:43:39 +0200 Subject: [PATCH 2/4] [cosmos-tidoop-api] Fix MR Jobs based on HDFS jars --- cosmos-tidoop-api/src/cmd_runner.js | 59 +++++++++++++++++++---------- cosmos-tidoop-api/src/tidoop_api.js | 13 +++++-- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/cosmos-tidoop-api/src/cmd_runner.js b/cosmos-tidoop-api/src/cmd_runner.js index bd47ff0..ac80d43 100644 --- a/cosmos-tidoop-api/src/cmd_runner.js +++ b/cosmos-tidoop-api/src/cmd_runner.js @@ -26,30 +26,47 @@ // Module dependencies var spawn = require('child_process').spawn; -function runHadoopJar(userId, jar, className, jarPath, input, output, callback) { - var params = ['-u', userId, 'hadoop', 'jar', jar, className, '-libjars', jarPath, input, output]; +function runHadoopJar(userId, jarName, jarInHDFS, className, libJarsName, libJarsInHDFS, input, output, callback) { + // Copy the jar from the HDFS user space + var params = ['-u', userId, 'hadoop', 'fs', '-copyToLocal', jarInHDFS, '/home/' + userId + '/' + jarName]; var command = spawn('sudo', params); - var jobId = null; - // This function catches the stderr as it is being produced (console logs are printed in the stderr). At the moment - // of receiving the line containing the job ID, get it and return with no error (no error means the job could be - // run, independently of the final result of the job) - command.stderr.on('data', function (data) { - var dataStr = data.toString(); - var magicString = 'Submitting tokens for job: '; - var indexOfJobId = dataStr.indexOf(magicString); - - if(indexOfJobId >= 0) { - jobId = dataStr.substring(indexOfJobId + magicString.length, indexOfJobId + magicString.length + 22); - return callback(null, jobId); - } // if - }); + command.on('close', function(code) { + // Copy the libjar from the HDFS user space + var params = ['-u', userId, 'hadoop', 'fs', '-copyToLocal', libJarsInHDFS, '/home/' + userId + '/' + libJarsName]; + var command = spawn('sudo', params); + + command.on('close', function(code) { + // Run the MR job + var params = ['-u', userId, 'hadoop', 'jar', '/home/' + userId + '/' + jarName, className, '-libjars', '/home/' + userId + '/' + libJarsName, input, output]; + var command = spawn('sudo', params); + var jobId = null; + + // This function catches the stderr as it is being produced (console logs are printed in the stderr). At the + // moment of receiving the line containing the job ID, get it and return with no error (no error means the + // job could be run, independently of the final result of the job) + command.stderr.on('data', function (data) { + var dataStr = data.toString(); + var magicString = 'Submitting tokens for job: '; + var indexOfJobId = dataStr.indexOf(magicString); + + if(indexOfJobId >= 0) { + jobId = dataStr.substring(indexOfJobId + magicString.length, indexOfJobId + magicString.length + 22); + var params = ['-u', userId, 'rm', '/home/' + userId + '/' + jarName]; + var command = spawn('sudo', params); + var params = ['-u', userId, 'rm', '/home/' + userId + '/' + libJarsName]; + var command = spawn('sudo', params); + return callback(null, jobId); + } // if + }); - // This function catches the moment the command finishes. Return the error code if the job ID was never got - command.on('close', function (code) { - if (jobId === null) { - return callback(code, null); - } // if + // This function catches the moment the command finishes. Return the error code if the job ID was never got + command.on('close', function (code) { + if (jobId === null) { + return callback(code, null); + } // if + }); + }); }); } // runHadoopJar diff --git a/cosmos-tidoop-api/src/tidoop_api.js b/cosmos-tidoop-api/src/tidoop_api.js index 39780ab..8dae60d 100644 --- a/cosmos-tidoop-api/src/tidoop_api.js +++ b/cosmos-tidoop-api/src/tidoop_api.js @@ -55,9 +55,15 @@ server.route({ path: '/tidoop/v1/user/{userId}/jobs', handler: function (request, reply) { var userId = request.params.userId; - var jar = request.payload.jar; + var jarInHDFS = 'hdfs://' + config.storage_cluster.namenode_host + ':' + + config.storage_cluster.namenode_ipc_port + '/user/' + userId + '/' + request.payload.jar; + var splits = request.payload.jar.split("/"); + var jarName = splits[splits.length - 1]; var className = request.payload.class_name; - var libJars = request.payload.lib_jars; + var libJarsInHDFS = 'hdfs://' + config.storage_cluster.namenode_host + ':' + + config.storage_cluster.namenode_ipc_port + '/user/' + userId + '/' + request.payload.lib_jars; + var splits = request.payload.lib_jars.split("/"); + var libJarsName = splits[splits.length - 1]; var input = 'hdfs://' + config.storage_cluster.namenode_host + ':' + config.storage_cluster.namenode_ipc_port + '/user/' + userId + '/' + request.payload.input; var output = 'hdfs://' + config.storage_cluster.namenode_host + ':' + config.storage_cluster.namenode_ipc_port @@ -65,7 +71,8 @@ server.route({ logger.info('Request: POST /tidoop/v1/user/' + userId + '/jobs ' + request.payload); - cmdRunner.runHadoopJar(userId, jar, className, libJars, input, output, function(error, result) { + cmdRunner.runHadoopJar(userId, jarName, jarInHDFS, className, libJarsName, libJarsInHDFS, input, output, + function(error, result) { if (error && error >= 0) { var response = '{"success":"false","error":' + error + '}'; logger.info(response); From 044d10bc20445a4988c2a3f72edd97291b0a016c Mon Sep 17 00:00:00 2001 From: frbattid Date: Wed, 8 Jun 2016 15:55:54 +0200 Subject: [PATCH 3/4] [cosmos][doc][quick_start_guide_new.md] Update regarding how to run jars at HDFS --- doc/manuals/quick_start_guide_new.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/manuals/quick_start_guide_new.md b/doc/manuals/quick_start_guide_new.md index 813747f..b241724 100644 --- a/doc/manuals/quick_start_guide_new.md +++ b/doc/manuals/quick_start_guide_new.md @@ -166,13 +166,13 @@ Coming soon. [Top](#top) ###Step 5: Run your first MapReduce job -Several pre-loaded MapReduce examples can be found in every Hadoop distribution, typically in a Java `-jar` file called `hadoop-mapreduce-examples.jar`. In this case, the Computing Endpoint owns that file at: +Several already developed MapReduce examples can be found in every Hadoop distribution, typically in a Java `.jar` file called `hadoop-mapreduce-examples.jar`. This file is copied to the HDFS space a user owns in FIWARE Lab, specifically under the `jars/` folder, so the `frb` user should have it copied to: - /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar + hdfs:///user/frb/jars/hadoop-mapreduce-examples.jar -For instance, you can run the Word Count example (this is also know as the "hello world" of Hadoop) by typing: +Thus, you can run the Word Count example (this is also know as the "hello world" of Hadoop) by typing: - $ curl -X POST "http://computing.cosmos.lab.fiware.org:12000/tidoop/v1/user/frb/jobs" -d '{"jar":"/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar","class_name":"wordcount","lib_jars":"/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar","input":"testdir","output":"testoutput"}' -H "Content-Type: application/json" -H "X-Auth-Token: 3azH09G1PdaGmgBNODLOtxy52f5a00" + $ curl -X POST "http://computing.cosmos.lab.fiware.org:12000/tidoop/v1/user/frb/jobs" -d '{"jar":"jars/hadoop-mapreduce-examples.jar","class_name":"wordcount","lib_jars":"jars/hadoop-mapreduce-examples.jar","input":"testdir","output":"testoutput"}' -H "Content-Type: application/json" -H "X-Auth-Token: 3azH09G1PdaGmgBNODLOtxy52f5a00" {"success":"true","job_id": "job_1460639183882_0001"} As you can see, another REST API has been used, in this case the Tidoop REST API in the Computing Endpoint. The API allows you checking the status of the job as well: From 37bc57f30b6070679c41e86e8fc38cc9ad5098f3 Mon Sep 17 00:00:00 2001 From: frbattid Date: Wed, 8 Jun 2016 15:57:23 +0200 Subject: [PATCH 4/4] [cosmos][mkdocs.yml] Update --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 11ba4d1..6cd9a16 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,7 +10,7 @@ theme: readthedocs extra_css: ["https://fiware.org/style/fiware_readthedocs.css"] pages: - Home: index.md - - 'Quick Start Guide': 'quick_start_guide.md' + - 'Quick Start Guide': 'quick_start_guide_new.md' - 'Installation and Administration manual': - 'Introduction': 'installation_and_administration_manual/introduction.md' - 'Batch processing: Some words about Cosmos and its ecosystem': 'installation_and_administration_manual/batch/some_words_about_cosmos_and_ecosystem.md'