Skip to content

Commit

Permalink
update fasttext to remove deprecated message
Browse files Browse the repository at this point in the history
  • Loading branch information
pommedeterresautee committed Oct 27, 2019
1 parent eaa4821 commit 7c1c7cd
Show file tree
Hide file tree
Showing 23 changed files with 900 additions and 182 deletions.
4 changes: 2 additions & 2 deletions CRAN-RELEASE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
This package was submitted to CRAN on 2019-04-14.
Once it is accepted, delete this file and tag the release (commit f935b7df52).
This package was submitted to CRAN on 2019-05-30.
Once it is accepted, delete this file and tag the release (commit eaa4821769).
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: fastrtext
Type: Package
Title: 'fastText' Wrapper for Text Classification and Word Representation
Version: 0.3.2
Date: 2019-04-14
Version: 0.3.4
Date: 2019-10-27
Authors@R: c(person("Michaël", "Benesty", role = c("aut", "cre", "cph"), email = "michael@benesty.fr"),
person("Facebook, Inc", role = c("cph"), email = "bojanowski@fb.com"))
Maintainer: Michaël Benesty <michael@benesty.fr>
Expand Down
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# 0.3.3 ()

# 0.3.3 (10/27/19)

* remove deprecated code to fix Cran warnings
* update to last FastText version
* support one vs all loss
* less macro (remove possibility to stop learning with CTRL+C)

# 0.3.2 (10.04.19)

Expand Down
7 changes: 7 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## Version 0.3.4

fix deprecated error messages.

## Version 0.3.3
small update

## Version 0.3.2
Following Cran e-mail (from Prof Brian Ripley), remove strip-debug in Makevars file.
Now the size of the package is > 10Mb and it generates a Warning regarding its size.
Expand Down
2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PKG_CPPFLAGS = -pthread -include r_compliance.h -I$(PKGROOT)
# pthread is used for multithreading by fastText
PKG_LIBS = -pthread

OBJECTS = add_prefix.o r_compliance.o $(PKGROOT)/args.o $(PKGROOT)/matrix.o $(PKGROOT)/dictionary.o $(PKGROOT)/loss.o $(PKGROOT)/productquantizer.o $(PKGROOT)/densematrix.o $(PKGROOT)/quantmatrix.o $(PKGROOT)/vector.o $(PKGROOT)/model.o $(PKGROOT)/utils.o $(PKGROOT)/meter.o $(PKGROOT)/fasttext.o $(PKGROOT)/main.o fastrtext.o RcppExports.o
OBJECTS = add_prefix.o r_compliance.o $(PKGROOT)/autotune.o $(PKGROOT)/args.o $(PKGROOT)/matrix.o $(PKGROOT)/dictionary.o $(PKGROOT)/loss.o $(PKGROOT)/productquantizer.o $(PKGROOT)/densematrix.o $(PKGROOT)/quantmatrix.o $(PKGROOT)/vector.o $(PKGROOT)/model.o $(PKGROOT)/utils.o $(PKGROOT)/meter.o $(PKGROOT)/fasttext.o $(PKGROOT)/main.o fastrtext.o RcppExports.o

# Reduce the size of the compiled library by removing unneeded debug information
# Need to check if we are on Linux and if strip is installed
Expand Down
2 changes: 1 addition & 1 deletion src/fastrtext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ class fastrtext{
case loss_name::ova:
return "one-vs-all";
default:
stop("Unrecognized loss (ns / hs / softmax) name!");
stop("Unrecognized loss (ns / hs / softmax / ova / one-vs-all) name!");
}
}

Expand Down
127 changes: 124 additions & 3 deletions src/fasttext/args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <iostream>
#include <stdexcept>
#include <unordered_map>

namespace fasttext {

Expand All @@ -36,12 +37,19 @@ Args::Args() {
verbose = 2;
pretrainedVectors = "";
saveOutput = false;
seed = 0;

qout = false;
retrain = false;
qnorm = false;
cutoff = 0;
dsub = 2;

autotuneValidationFile = "";
autotuneMetric = "f1";
autotunePredictions = 1;
autotuneDuration = 60 * 5; // 5 minutes
autotuneModelSize = "";
}

std::string Args::lossToString(loss_name ln) const {
Expand Down Expand Up @@ -78,6 +86,16 @@ std::string Args::modelToString(model_name mn) const {
return "Unknown model name!"; // should never happen
}

std::string Args::metricToString(metric_name mn) const {
switch (mn) {
case metric_name::f1score:
return "f1score";
case metric_name::labelf1score:
return "labelf1score";
}
return "Unknown metric name!"; // should never happen
}

void Args::parseArgs(const std::vector<std::string>& args) {
std::string command(args[1]);
if (command == "supervised") {
Expand All @@ -97,6 +115,8 @@ void Args::parseArgs(const std::vector<std::string>& args) {
exit(EXIT_FAILURE);
}
try {
setManual(args[ai].substr(1));

if (args[ai] == "-h") {
std::cerr << "Here is the help! Usage:" << std::endl;
printHelp();
Expand Down Expand Up @@ -157,6 +177,8 @@ void Args::parseArgs(const std::vector<std::string>& args) {
} else if (args[ai] == "-saveOutput") {
saveOutput = true;
ai--;
} else if (args[ai] == "-seed") {
seed = std::stoi(args.at(ai + 1));
} else if (args[ai] == "-qnorm") {
qnorm = true;
ai--;
Expand All @@ -170,12 +192,24 @@ void Args::parseArgs(const std::vector<std::string>& args) {
cutoff = std::stoi(args.at(ai + 1));
} else if (args[ai] == "-dsub") {
dsub = std::stoi(args.at(ai + 1));
} else if (args[ai] == "-autotune-validation") {
autotuneValidationFile = std::string(args.at(ai + 1));
} else if (args[ai] == "-autotune-metric") {
autotuneMetric = std::string(args.at(ai + 1));
getAutotuneMetric(); // throws exception if not able to parse
getAutotuneMetricLabel(); // throws exception if not able to parse
} else if (args[ai] == "-autotune-predictions") {
autotunePredictions = std::stoi(args.at(ai + 1));
} else if (args[ai] == "-autotune-duration") {
autotuneDuration = std::stoi(args.at(ai + 1));
} else if (args[ai] == "-autotune-modelsize") {
autotuneModelSize = std::string(args.at(ai + 1));
} else {
std::cerr << "Unknown argument: " << args[ai] << std::endl;
printHelp();
exit(EXIT_FAILURE);
}
} catch (std::out_of_range&) {
} catch (std::out_of_range) {
std::cerr << args[ai] << " is missing an argument" << std::endl;
printHelp();
exit(EXIT_FAILURE);
Expand All @@ -195,6 +229,7 @@ void Args::printHelp() {
printBasicHelp();
printDictionaryHelp();
printTrainingHelp();
printAutotuneHelp();
printQuantizationHelp();
}

Expand Down Expand Up @@ -235,11 +270,27 @@ void Args::printTrainingHelp() {
<< " -neg number of negatives sampled [" << neg << "]\n"
<< " -loss loss function {ns, hs, softmax, one-vs-all} ["
<< lossToString(loss) << "]\n"
<< " -thread number of threads [" << thread << "]\n"
<< " -thread number of threads (set to 1 to ensure reproducible results) ["
<< thread << "]\n"
<< " -pretrainedVectors pretrained word vectors for supervised learning ["
<< pretrainedVectors << "]\n"
<< " -saveOutput whether output params should be saved ["
<< boolToString(saveOutput) << "]\n";
<< boolToString(saveOutput) << "]\n"
<< " -seed random generator seed [" << seed << "]\n";
}

void Args::printAutotuneHelp() {
std::cerr
<< "\nThe following arguments are for autotune:\n"
<< " -autotune-validation validation file to be used for evaluation\n"
<< " -autotune-metric metric objective {f1, f1:labelname} ["
<< autotuneMetric << "]\n"
<< " -autotune-predictions number of predictions used for evaluation ["
<< autotunePredictions << "]\n"
<< " -autotune-duration maximum duration in seconds ["
<< autotuneDuration << "]\n"
<< " -autotune-modelsize constraint model file size ["
<< autotuneModelSize << "] (empty = do not quantize)\n";
}

void Args::printQuantizationHelp() {
Expand Down Expand Up @@ -317,4 +368,74 @@ void Args::dump(std::ostream& out) const {
<< " " << t << std::endl;
}

bool Args::hasAutotune() const {
return !autotuneValidationFile.empty();
}

bool Args::isManual(const std::string& argName) const {
return (manualArgs_.count(argName) != 0);
}

void Args::setManual(const std::string& argName) {
manualArgs_.emplace(argName);
}

metric_name Args::getAutotuneMetric() const {
if (autotuneMetric.substr(0, 3) == "f1:") {
return metric_name::labelf1score;
} else if (autotuneMetric == "f1") {
return metric_name::f1score;
}
throw std::runtime_error("Unknown metric : " + autotuneMetric);
}

std::string Args::getAutotuneMetricLabel() const {
if (getAutotuneMetric() == metric_name::labelf1score) {
std::string label = autotuneMetric.substr(3);
if (label.empty()) {
throw std::runtime_error("Empty metric label : " + autotuneMetric);
}
return label;
}
return std::string();
}

int64_t Args::getAutotuneModelSize() const {
std::string modelSize = autotuneModelSize;
if (modelSize.empty()) {
return Args::kUnlimitedModelSize;
}
std::unordered_map<char, int> units = {
{'k', 1000},
{'K', 1000},
{'m', 1000000},
{'M', 1000000},
{'g', 1000000000},
{'G', 1000000000},
};
uint64_t multiplier = 1;
char lastCharacter = modelSize.back();
if (units.count(lastCharacter)) {
multiplier = units[lastCharacter];
modelSize = modelSize.substr(0, modelSize.size() - 1);
}
uint64_t size = 0;
size_t nonNumericCharacter = 0;
bool parseError = false;
try {
size = std::stol(modelSize, &nonNumericCharacter);
} catch (std::invalid_argument&) {
parseError = true;
}
if (!parseError && nonNumericCharacter != modelSize.size()) {
parseError = true;
}
if (parseError) {
throw std::invalid_argument(
"Unable to parse model size " + autotuneModelSize);
}

return size * multiplier;
}

} // namespace fasttext
22 changes: 21 additions & 1 deletion src/fasttext/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,21 @@
#include <istream>
#include <ostream>
#include <string>
#include <unordered_set>
#include <vector>

namespace fasttext {

enum class model_name : int { cbow = 1, sg, sup };
enum class loss_name : int { hs = 1, ns, softmax, ova };
enum class metric_name : int { f1score = 1, labelf1score };

class Args {
protected:
std::string lossToString(loss_name) const;
std::string boolToString(bool) const;
std::string modelToString(model_name) const;
std::string metricToString(metric_name) const;
std::unordered_set<std::string> manualArgs_;

public:
Args();
Expand All @@ -48,21 +51,38 @@ class Args {
int verbose;
std::string pretrainedVectors;
bool saveOutput;
int seed;

bool qout;
bool retrain;
bool qnorm;
size_t cutoff;
size_t dsub;

std::string autotuneValidationFile;
std::string autotuneMetric;
int autotunePredictions;
int autotuneDuration;
std::string autotuneModelSize;

void parseArgs(const std::vector<std::string>& args);
void printHelp();
void printBasicHelp();
void printDictionaryHelp();
void printTrainingHelp();
void printAutotuneHelp();
void printQuantizationHelp();
void save(std::ostream&);
void load(std::istream&);
void dump(std::ostream&) const;
bool hasAutotune() const;
bool isManual(const std::string& argName) const;
void setManual(const std::string& argName);
std::string lossToString(loss_name) const;
metric_name getAutotuneMetric() const;
std::string getAutotuneMetricLabel() const;
int64_t getAutotuneModelSize() const;

static constexpr double kUnlimitedModelSize = -1.0;
};
} // namespace fasttext
Loading

0 comments on commit 7c1c7cd

Please sign in to comment.