Skip to content

Commit

Permalink
Merge pull request #173 from telefonicaid/bug/172_run_jars_in_hdfs
Browse files Browse the repository at this point in the history
bug/172_run_jars_in_hdfs
  • Loading branch information
pcoello25 committed Jun 9, 2016
2 parents 917ecf3 + 37bc57f commit 5ad2ad2
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGES_NEXT_RELEASE
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
- [cosmos] [HARDENING] Fix the base path for readthedocks in mkdocs.yml (#164)
- [cosmos] [HARDENING] Update the Quick Start Guide with regards to the new FIWARE Lab clusters (#167)
- [cosmos] [HARDENING] Add Hive clients to resources folder (#168)
- [cosmos-tidoop-api] [BUG] Run Java jars located in HDFS (#172)
59 changes: 38 additions & 21 deletions cosmos-tidoop-api/src/cmd_runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,47 @@
// Module dependencies
var spawn = require('child_process').spawn;

function runHadoopJar(userId, jar, className, jarPath, input, output, callback) {
var params = ['-u', userId, 'hadoop', 'jar', jar, className, '-libjars', jarPath, input, output];
function runHadoopJar(userId, jarName, jarInHDFS, className, libJarsName, libJarsInHDFS, input, output, callback) {
// Copy the jar from the HDFS user space
var params = ['-u', userId, 'hadoop', 'fs', '-copyToLocal', jarInHDFS, '/home/' + userId + '/' + jarName];
var command = spawn('sudo', params);
var jobId = null;

// This function catches the stderr as it is being produced (console logs are printed in the stderr). At the moment
// of receiving the line containing the job ID, get it and return with no error (no error means the job could be
// run, independently of the final result of the job)
command.stderr.on('data', function (data) {
var dataStr = data.toString();
var magicString = 'Submitting tokens for job: ';
var indexOfJobId = dataStr.indexOf(magicString);

if(indexOfJobId >= 0) {
jobId = dataStr.substring(indexOfJobId + magicString.length, indexOfJobId + magicString.length + 22);
return callback(null, jobId);
} // if
});
command.on('close', function(code) {
// Copy the libjar from the HDFS user space
var params = ['-u', userId, 'hadoop', 'fs', '-copyToLocal', libJarsInHDFS, '/home/' + userId + '/' + libJarsName];
var command = spawn('sudo', params);

command.on('close', function(code) {
// Run the MR job
var params = ['-u', userId, 'hadoop', 'jar', '/home/' + userId + '/' + jarName, className, '-libjars', '/home/' + userId + '/' + libJarsName, input, output];
var command = spawn('sudo', params);
var jobId = null;

// This function catches the stderr as it is being produced (console logs are printed in the stderr). At the
// moment of receiving the line containing the job ID, get it and return with no error (no error means the
// job could be run, independently of the final result of the job)
command.stderr.on('data', function (data) {
var dataStr = data.toString();
var magicString = 'Submitting tokens for job: ';
var indexOfJobId = dataStr.indexOf(magicString);

if(indexOfJobId >= 0) {
jobId = dataStr.substring(indexOfJobId + magicString.length, indexOfJobId + magicString.length + 22);
var params = ['-u', userId, 'rm', '/home/' + userId + '/' + jarName];
var command = spawn('sudo', params);
var params = ['-u', userId, 'rm', '/home/' + userId + '/' + libJarsName];
var command = spawn('sudo', params);
return callback(null, jobId);
} // if
});

// This function catches the moment the command finishes. Return the error code if the job ID was never got
command.on('close', function (code) {
if (jobId === null) {
return callback(code, null);
} // if
// This function catches the moment the command finishes. Return the error code if the job ID was never got
command.on('close', function (code) {
if (jobId === null) {
return callback(code, null);
} // if
});
});
});
} // runHadoopJar

Expand Down
13 changes: 10 additions & 3 deletions cosmos-tidoop-api/src/tidoop_api.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,24 @@ server.route({
path: '/tidoop/v1/user/{userId}/jobs',
handler: function (request, reply) {
var userId = request.params.userId;
var jar = request.payload.jar;
var jarInHDFS = 'hdfs://' + config.storage_cluster.namenode_host + ':'
+ config.storage_cluster.namenode_ipc_port + '/user/' + userId + '/' + request.payload.jar;
var splits = request.payload.jar.split("/");
var jarName = splits[splits.length - 1];
var className = request.payload.class_name;
var libJars = request.payload.lib_jars;
var libJarsInHDFS = 'hdfs://' + config.storage_cluster.namenode_host + ':'
+ config.storage_cluster.namenode_ipc_port + '/user/' + userId + '/' + request.payload.lib_jars;
var splits = request.payload.lib_jars.split("/");
var libJarsName = splits[splits.length - 1];
var input = 'hdfs://' + config.storage_cluster.namenode_host + ':' + config.storage_cluster.namenode_ipc_port
+ '/user/' + userId + '/' + request.payload.input;
var output = 'hdfs://' + config.storage_cluster.namenode_host + ':' + config.storage_cluster.namenode_ipc_port
+ '/user/' + userId + '/' + request.payload.output;

logger.info('Request: POST /tidoop/v1/user/' + userId + '/jobs ' + request.payload);

cmdRunner.runHadoopJar(userId, jar, className, libJars, input, output, function(error, result) {
cmdRunner.runHadoopJar(userId, jarName, jarInHDFS, className, libJarsName, libJarsInHDFS, input, output,
function(error, result) {
if (error && error >= 0) {
var response = '{"success":"false","error":' + error + '}';
logger.info(response);
Expand Down
8 changes: 4 additions & 4 deletions doc/manuals/quick_start_guide_new.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,13 @@ Coming soon.
[Top](#top)

###<a name="section3.5"></a>Step 5: Run your first MapReduce job
Several pre-loaded MapReduce examples can be found in every Hadoop distribution, typically in a Java `-jar` file called `hadoop-mapreduce-examples.jar`. In this case, the <i>Computing Endpoint</i> owns that file at:
Several already developed MapReduce examples can be found in every Hadoop distribution, typically in a Java `.jar` file called `hadoop-mapreduce-examples.jar`. This file is copied to the HDFS space a user owns in FIWARE Lab, specifically under the `jars/` folder, so the `frb` user should have it copied to:

/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar
hdfs:///user/frb/jars/hadoop-mapreduce-examples.jar

For instance, you can run the <i>Word Count</i> example (this is also know as the "hello world" of Hadoop) by typing:
Thus, you can run the <i>Word Count</i> example (this is also know as the "hello world" of Hadoop) by typing:

$ curl -X POST "http://computing.cosmos.lab.fiware.org:12000/tidoop/v1/user/frb/jobs" -d '{"jar":"/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar","class_name":"wordcount","lib_jars":"/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar","input":"testdir","output":"testoutput"}' -H "Content-Type: application/json" -H "X-Auth-Token: 3azH09G1PdaGmgBNODLOtxy52f5a00"
$ curl -X POST "http://computing.cosmos.lab.fiware.org:12000/tidoop/v1/user/frb/jobs" -d '{"jar":"jars/hadoop-mapreduce-examples.jar","class_name":"wordcount","lib_jars":"jars/hadoop-mapreduce-examples.jar","input":"testdir","output":"testoutput"}' -H "Content-Type: application/json" -H "X-Auth-Token: 3azH09G1PdaGmgBNODLOtxy52f5a00"
{"success":"true","job_id": "job_1460639183882_0001"}

As you can see, another REST API has been used, in this case the Tidoop REST API in the <i>Computing Endpoint</i>. The API allows you checking the status of the job as well:
Expand Down
2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ theme: readthedocs
extra_css: ["https://fiware.org/style/fiware_readthedocs.css"]
pages:
- Home: index.md
- 'Quick Start Guide': 'quick_start_guide.md'
- 'Quick Start Guide': 'quick_start_guide_new.md'
- 'Installation and Administration manual':
- 'Introduction': 'installation_and_administration_manual/introduction.md'
- 'Batch processing: Some words about Cosmos and its ecosystem': 'installation_and_administration_manual/batch/some_words_about_cosmos_and_ecosystem.md'
Expand Down

0 comments on commit 5ad2ad2

Please sign in to comment.