diff --git a/ortools/sat/clause.cc b/ortools/sat/clause.cc index 6cdbae8c588..f68931cbc0a 100644 --- a/ortools/sat/clause.cc +++ b/ortools/sat/clause.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -550,8 +551,8 @@ bool BinaryImplicationGraph::AddBinaryClauseDuringSearch(Literal a, Literal b) { return true; } -bool BinaryImplicationGraph::AddAtMostOne( - absl::Span at_most_one) { +bool BinaryImplicationGraph::AddAtMostOne(absl::Span at_most_one, + int expansion_size) { CHECK_EQ(trail_->CurrentDecisionLevel(), 0); if (at_most_one.size() <= 1) return true; @@ -564,7 +565,7 @@ bool BinaryImplicationGraph::AddAtMostOne( at_most_one_buffer_.push_back(Literal(kNoLiteralIndex)); is_dag_ = false; - return CleanUpAndAddAtMostOnes(base_index); + return CleanUpAndAddAtMostOnes(base_index, expansion_size); } // TODO(user): remove duplication with @@ -584,7 +585,8 @@ bool BinaryImplicationGraph::FixLiteral(Literal true_literal) { // This works by doing a linear scan on the at_most_one_buffer_ and // cleaning/copying the at most ones on the fly to the beginning of the same // buffer. -bool BinaryImplicationGraph::CleanUpAndAddAtMostOnes(const int base_index) { +bool BinaryImplicationGraph::CleanUpAndAddAtMostOnes(const int base_index, + int expansion_size) { const VariablesAssignment& assignment = trail_->Assignment(); int local_end = base_index; const int buffer_size = at_most_one_buffer_.size(); @@ -675,7 +677,7 @@ bool BinaryImplicationGraph::CleanUpAndAddAtMostOnes(const int base_index) { // We expand small sizes into implications. // TODO(user): Investigate what the best threshold is. - if (at_most_one.size() < 10) { + if (at_most_one.size() < expansion_size) { // Note that his automatically skip size 0 and 1. for (const Literal a : at_most_one) { for (const Literal b : at_most_one) { @@ -1859,6 +1861,88 @@ BinaryImplicationGraph::GenerateAtMostOnesWithLargeWeight( return tmp_cuts_; } +// TODO(user): Use deterministic limit. +// TODO(user): Explore the graph instead of just looking at full amo, especially +// since we expand small ones. +std::vector> +BinaryImplicationGraph::HeuristicAmoPartition(std::vector* literals) { + std::vector> result; + + absl::StrongVector to_consider(implications_.size(), + false); + for (const Literal l : *literals) to_consider[l.Index()] = true; + + // Priority queue of (intersection_size, start_of_amo). + std::priority_queue> pq; + + // Compute for each at most one that might overlap, its overlaping size and + // stores its start in the at_most_one_buffer_. + // + // This is in O(num_literal in amo). + absl::flat_hash_set explored_amo; + for (const Literal l : *literals) { + if (l.Index() >= at_most_ones_.size()) continue; + for (const int start : at_most_ones_[l.Index()]) { + const auto [_, inserted] = explored_amo.insert(start); + if (!inserted) continue; + + int intersection_size = 0; + for (int i = start;; ++i) { + const Literal l = at_most_one_buffer_[i]; + if (l.Index() == kNoLiteralIndex) break; + if (to_consider[l.Index()]) ++intersection_size; + } + if (intersection_size > 1) { + pq.push({intersection_size, start}); + } + + // Abort early if we are done. + if (intersection_size == literals->size()) break; + } + } + + // Consume AMOs, update size. + int num_processed = 0; + while (!pq.empty()) { + const auto [old_size, start] = pq.top(); + pq.pop(); + + // Recompute size. + int intersection_size = 0; + for (int i = start;; ++i) { + const Literal l = at_most_one_buffer_[i]; + if (l.Index() == kNoLiteralIndex) break; + if (to_consider[l.Index()]) ++intersection_size; + } + if (intersection_size != old_size) { + // re-add with new size. + if (intersection_size > 1) { + pq.push({intersection_size, start}); + } + continue; + } + + // Mark the literal of that at most one at false. + for (int i = start;; ++i) { + const Literal l = at_most_one_buffer_[i]; + if (l.Index() == kNoLiteralIndex) break; + to_consider[l.Index()] = false; + } + + // Extract the intersection by moving it in + // [num_processed, num_processed + intersection_size). + const int span_start = num_processed; + for (int i = num_processed; i < literals->size(); ++i) { + if (to_consider[(*literals)[i].Index()]) continue; + std::swap((*literals)[num_processed], (*literals)[i]); + ++num_processed; + } + result.push_back(absl::MakeSpan(literals->data() + span_start, + num_processed - span_start)); + } + return result; +} + void BinaryImplicationGraph::MarkDescendants(Literal root) { bfs_stack_.resize(implications_.size()); auto* const stack = bfs_stack_.data(); diff --git a/ortools/sat/clause.h b/ortools/sat/clause.h index 15ad05988f3..f3a35c054dd 100644 --- a/ortools/sat/clause.h +++ b/ortools/sat/clause.h @@ -509,7 +509,10 @@ class BinaryImplicationGraph : public SatPropagator { // TODO(user): Our algorithm could generalize easily to at_most_ones + a list // of literals that will be false if one of the literal in the amo is at one. // It is a way to merge common list of implications. - ABSL_MUST_USE_RESULT bool AddAtMostOne(absl::Span at_most_one); + // + // If the final AMO size is smaller than "expansion_size" we fully expand it. + ABSL_MUST_USE_RESULT bool AddAtMostOne(absl::Span at_most_one, + int expansion_size = 10); // Uses the binary implication graph to minimize the given conflict by // removing literals that implies others. The idea is that if a and b are two @@ -610,6 +613,13 @@ class BinaryImplicationGraph : public SatPropagator { const std::vector& literals, const std::vector& lp_values); + // Heuristically identify "at most one" between the given literals, swap + // them around and return these amo as span inside the literals vector. + // + // TODO(user): Add a limit to make sure this do not take too much time. + std::vector> HeuristicAmoPartition( + std::vector* literals); + // Number of literal propagated by this class (including conflicts). int64_t num_propagations() const { return num_propagations_; } @@ -760,7 +770,9 @@ class BinaryImplicationGraph : public SatPropagator { // at_most_one_buffer_. This replace literal by their representative, remove // fixed literals and deal with duplicates. Return false iff the model is // UNSAT. - bool CleanUpAndAddAtMostOnes(int base_index); + // + // If the final AMO size is smaller than "expansion_size" we fully expand it. + bool CleanUpAndAddAtMostOnes(int base_index, int expansion_size = 10); mutable StatsGroup stats_; TimeLimit* time_limit_; diff --git a/ortools/sat/cp_model_expand.cc b/ortools/sat/cp_model_expand.cc index 3802aadb3d5..d27772d5122 100644 --- a/ortools/sat/cp_model_expand.cc +++ b/ortools/sat/cp_model_expand.cc @@ -1275,27 +1275,36 @@ bool ReduceTableInPresenceOfUniqueVariableWithCosts( // This comes from the WCSP litterature. Basically, if by fixing a variable to // a value, we have only tuples with a non-zero cost, we can substract the // minimum cost of these tuples and transfer it to the variable cost. - for (int var_index = 0; var_index < new_vars.size(); ++var_index) { - absl::flat_hash_map value_to_min_cost; - const int num_tuples = tuples->size(); - for (int i = 0; i < num_tuples; ++i) { - const int64_t v = (*tuples)[i][var_index]; - const int64_t cost = (*tuples)[i].back(); - auto insert = value_to_min_cost.insert({v, cost}); - if (!insert.second) { - insert.first->second = std::min(insert.first->second, cost); + // + // TODO(user): Doing this before table compression can prevent good + // compression. We should probably exploit this during compression to make + // sure we compress as much as possible, and once compressed, do it again. Or + // do it in a more general IP settings when one iterals implies that a set of + // literals with >0 cost are in EXO. We can transfer the min of their cost to + // that Boolean. + if (/*DISABLES CODE*/ (false)) { + for (int var_index = 0; var_index < new_vars.size(); ++var_index) { + absl::flat_hash_map value_to_min_cost; + const int num_tuples = tuples->size(); + for (int i = 0; i < num_tuples; ++i) { + const int64_t v = (*tuples)[i][var_index]; + const int64_t cost = (*tuples)[i].back(); + auto insert = value_to_min_cost.insert({v, cost}); + if (!insert.second) { + insert.first->second = std::min(insert.first->second, cost); + } + } + for (int i = 0; i < num_tuples; ++i) { + const int64_t v = (*tuples)[i][var_index]; + (*tuples)[i].back() -= value_to_min_cost.at(v); + } + for (const auto entry : value_to_min_cost) { + if (entry.second == 0) continue; + context->UpdateRuleStats("table: transferred cost to encoding"); + const int value_literal = context->GetOrCreateVarValueEncoding( + new_vars[var_index], entry.first); + context->AddLiteralToObjective(value_literal, entry.second); } - } - for (int i = 0; i < num_tuples; ++i) { - const int64_t v = (*tuples)[i][var_index]; - (*tuples)[i].back() -= value_to_min_cost.at(v); - } - for (const auto entry : value_to_min_cost) { - if (entry.second == 0) continue; - context->UpdateRuleStats("table: transferred cost to encoding"); - const int value_literal = context->GetOrCreateVarValueEncoding( - new_vars[var_index], entry.first); - context->AddLiteralToObjective(value_literal, entry.second); } } diff --git a/ortools/sat/encoding.cc b/ortools/sat/encoding.cc index a8305973c38..e40f5510229 100644 --- a/ortools/sat/encoding.cc +++ b/ortools/sat/encoding.cc @@ -68,6 +68,33 @@ void EncodingNode::InitializeFullNode(int n, EncodingNode* a, EncodingNode* b, for_sorting_ = first_var_index; } +void EncodingNode::InitializeAmoNode(absl::Span nodes, + SatSolver* solver) { + CHECK_GE(nodes.size(), 2); + CHECK(literals_.empty()) << "Already initialized"; + const BooleanVariable var = solver->NewBooleanVariable(); + const Literal new_literal(var, true); + literals_.push_back(new_literal); + child_a_ = nullptr; + child_b_ = nullptr; + lb_ = 0; + depth_ = 0; + for_sorting_ = var; + std::vector clause{new_literal.Negated()}; + for (const EncodingNode* node : nodes) { + // node_lit => new_lit. + clause.push_back(node->literals_[0]); + solver->AddBinaryClause(node->literals_[0].Negated(), new_literal); + lb_ += node->lb_; + depth_ = std::max(node->depth_ + 1, depth_); + for_sorting_ = std::min(for_sorting_, node->for_sorting_); + } + + // If new_literal is true then one of the lit must be true. + // Note that this is not needed for correctness though. + solver->AddProblemClause(clause); +} + void EncodingNode::InitializeLazyNode(EncodingNode* a, EncodingNode* b, SatSolver* solver) { CHECK(literals_.empty()) << "Already initialized"; @@ -552,7 +579,8 @@ Coefficient MaxNodeWeightSmallerThan(const std::vector& nodes, bool ProcessCore(const std::vector& core, Coefficient min_weight, std::deque* repository, - std::vector* nodes, SatSolver* solver) { + std::vector* nodes, SatSolver* solver, + BinaryImplicationGraph* implications) { // Backtrack to be able to add new constraints. solver->ResetToLevelZero(); if (core.size() == 1) { @@ -593,6 +621,69 @@ bool ProcessCore(const std::vector& core, Coefficient min_weight, ++new_node_index; } nodes->resize(new_node_index); + + // Amongst the node to merge, if many are leaf nodes in an "at most one" + // relationship, it is super advantageous to exploit it during merging as + // we can regroup all nodes from an at most one in a single new node with a + // depth of 1. + if (implications != nullptr) { + std::vector leaves; + absl::flat_hash_map node_indices; + for (int i = 0; i < to_merge.size(); ++i) { + const EncodingNode& node = *to_merge[i]; + if (node.depth() != 0) continue; + if (node.size() != 1) continue; + if (node.ub() != node.lb() + 1) continue; + if (node_indices.contains(node.literal(0).Index())) continue; + node_indices[node.literal(0).Index()] = i; + leaves.push_back(node.literal(0)); + } + + int num_in_decompo = 0; + const std::vector> decomposition = + implications->HeuristicAmoPartition(&leaves); + for (const auto amo : decomposition) { + num_in_decompo += amo.size(); + + // Extract Amo nodes and set them to nullptr in to_merge. + std::vector amo_nodes; + for (const Literal l : amo) { + const int index = node_indices.at(l.Index()); + amo_nodes.push_back(to_merge[index]); + to_merge[index] = nullptr; + } + + // Create the new node with proper constraints and weight. + repository->push_back(EncodingNode()); + EncodingNode* n = &repository->back(); + n->InitializeAmoNode(amo_nodes, solver); + n->set_weight(min_weight); + to_merge.push_back(n); + } + VLOG(2) << "Detected " << decomposition.size() << " AMO on " + << num_in_decompo << "/" << leaves.size() << " core literals"; + + // Clean-up to_merge. + int new_size = 0; + for (EncodingNode* node : to_merge) { + if (node == nullptr) continue; + to_merge[new_size++] = node; + } + to_merge.resize(new_size); + + if (to_merge.size() == 1) { + // Increase the bound in this case. + // + // TODO(user): No need to create this literal + implications in the first + // place. Moreover we actually have an "exactly one" in this case and if + // the amo used was larger, we can just set these extra literals to false. + // Note that since this is a core, the solver already learned that at + // least one of these literal must be true. We also explicitly added this + // clause in InitializeAmoNode() above. + return solver->AddUnitClause(to_merge[0]->literal(0)); + } + } + nodes->push_back(LazyMergeAllNodeWithPQAndIncreaseLb(min_weight, to_merge, solver, repository)); return !solver->ModelIsUnsat(); diff --git a/ortools/sat/encoding.h b/ortools/sat/encoding.h index 3501de150e6..37bbb60c0b3 100644 --- a/ortools/sat/encoding.h +++ b/ortools/sat/encoding.h @@ -81,6 +81,12 @@ class EncodingNode { void InitializeLazyCoreNode(Coefficient weight, EncodingNode* a, EncodingNode* b); + // If we know that all the literals[0] of the given nodes are in "at most one" + // relationship, we can create a node that is the sum of them with a simple + // encoding. This does create linking implications. + void InitializeAmoNode(absl::Span nodes, + SatSolver* solver); + // Returns a literal with the meaning 'this node number is > i'. // The given i must be in [lb_, current_ub). Literal GreaterThan(int i) const { return literal(i - lb_); } @@ -147,7 +153,7 @@ class EncodingNode { int ub_ = 1; BooleanVariable for_sorting_; - // The weight is only applies for literal >= this lb. + // The weight is only applied for literal >= this lb. int weight_lb_ = 0; Coefficient weight_; @@ -225,9 +231,13 @@ Coefficient MaxNodeWeightSmallerThan(const std::vector& nodes, // Updates the encoding using the given core. The literals in the core must // match the order in nodes. Returns false if the model become infeasible. +// +// If "implications" is not null, we try to exploit at_most_ones between +// literal of the core for a better Boolean encoding. bool ProcessCore(const std::vector& core, Coefficient min_weight, std::deque* repository, - std::vector* nodes, SatSolver* solver); + std::vector* nodes, SatSolver* solver, + BinaryImplicationGraph* implications = nullptr); // There is more than one way to create new assumptions and encode the // information from this core. This is slightly different from ProcessCore() and diff --git a/ortools/sat/optimization.cc b/ortools/sat/optimization.cc index eb454bd99ca..95e54954a2f 100644 --- a/ortools/sat/optimization.cc +++ b/ortools/sat/optimization.cc @@ -1070,7 +1070,7 @@ SatSolver::Status SolveWithCardinalityEncodingAndCore( stratified_lower_bound = min_weight; } - ProcessCore(core, min_weight, &repository, &nodes, solver); + ProcessCore(core, min_weight, &repository, &nodes, solver, nullptr); max_depth = std::max(max_depth, nodes.back()->depth()); } } @@ -1587,10 +1587,12 @@ SatSolver::Status CoreBasedOptimizer::OptimizeWithSatEncoding( Coefficient lower_bound(0); // This is used by the "stratified" approach. - // TODO(user): Take into account parameters. Coefficient stratified_lower_bound(0); - for (EncodingNode* n : nodes) { - stratified_lower_bound = std::max(stratified_lower_bound, n->weight()); + if (parameters_->max_sat_stratification() != + SatParameters::STRATIFICATION_NONE) { + for (EncodingNode* n : nodes) { + stratified_lower_bound = std::max(stratified_lower_bound, n->weight()); + } } // Start the algorithm. @@ -1633,6 +1635,10 @@ SatSolver::Status CoreBasedOptimizer::OptimizeWithSatEncoding( new_obj_lb, integer_trail_->LevelZeroUpperBound(objective_var_)); } + VLOG(2) << "[Core] #nodes " << nodes.size() + << " #assumptions:" << assumptions.size() + << " stratification:" << stratified_lower_bound; + // Solve under the assumptions. // // TODO(user): Find multiple core like in the "main" algorithm. @@ -1668,7 +1674,8 @@ SatSolver::Status CoreBasedOptimizer::OptimizeWithSatEncoding( // We only count an iter when we found a core. ++iter; - if (!ProcessCore(core, min_weight, &repository, &nodes, sat_solver_)) { + if (!ProcessCore(core, min_weight, &repository, &nodes, sat_solver_, + implications_)) { return SatSolver::INFEASIBLE; } max_depth = std::max(max_depth, nodes.back()->depth()); diff --git a/ortools/sat/sat_cnf_reader.h b/ortools/sat/sat_cnf_reader.h index 7a8223d8347..9ec3746909f 100644 --- a/ortools/sat/sat_cnf_reader.h +++ b/ortools/sat/sat_cnf_reader.h @@ -14,6 +14,7 @@ #ifndef OR_TOOLS_SAT_SAT_CNF_READER_H_ #define OR_TOOLS_SAT_SAT_CNF_READER_H_ +#include #include #include #include @@ -34,16 +35,34 @@ namespace operations_research { namespace sat { -struct LinearBooleanProblemWrapper { - explicit LinearBooleanProblemWrapper(LinearBooleanProblem* p) : problem(p) {} - - void SetNumVariables(int num) { problem->set_num_variables(num); } - void SetOriginalNumVariables(int num) { - problem->set_original_num_variables(num); +// This implement the implicit contract needed by the SatCnfReader class. +class LinearBooleanProblemWrapper { + public: + explicit LinearBooleanProblemWrapper(LinearBooleanProblem* p) : problem_(p) {} + + // In the new 2022 .wcnf format, we don't know the number of variable before + // hand (no header). So when this is called (after all the constraint have + // been added), we need to re-index the slack so that they are after the + // variable of the original problem. + void SetSizeAndPostprocess(int num_variables, int num_slacks) { + problem_->set_num_variables(num_variables + num_slacks); + problem_->set_original_num_variables(num_variables); + for (const int c : to_postprocess_) { + auto* literals = problem_->mutable_constraints(c)->mutable_literals(); + const int last_index = literals->size() - 1; + const int last = (*literals)[last_index]; + (*literals)[last_index] = + last >= 0 ? last + num_variables : last - num_variables; + } } - void AddConstraint(absl::Span clause) { - LinearBooleanConstraint* constraint = problem->add_constraints(); + // If last_is_slack is true, then the last literal is assumed to be a slack + // with index in [-num_slacks, num_slacks]. We will re-index it at the end in + // SetSizeAndPostprocess(). + void AddConstraint(absl::Span clause, bool last_is_slack = false) { + if (last_is_slack) + to_postprocess_.push_back(problem_->constraints().size()); + LinearBooleanConstraint* constraint = problem_->add_constraints(); constraint->mutable_literals()->Reserve(clause.size()); constraint->mutable_coefficients()->Reserve(clause.size()); constraint->set_lower_bound(1); @@ -55,40 +74,49 @@ struct LinearBooleanProblemWrapper { void AddObjectiveTerm(int literal, int64_t value) { CHECK_GE(literal, 0) << "Negative literal not supported."; - problem->mutable_objective()->add_literals(literal); - problem->mutable_objective()->add_coefficients(value); + problem_->mutable_objective()->add_literals(literal); + problem_->mutable_objective()->add_coefficients(value); } void SetObjectiveOffset(int64_t offset) { - problem->mutable_objective()->set_offset(offset); + problem_->mutable_objective()->set_offset(offset); } - LinearBooleanProblem* problem; + private: + LinearBooleanProblem* problem_; + std::vector to_postprocess_; }; -struct CpModelProtoWrapper { - explicit CpModelProtoWrapper(CpModelProto* p) : problem(p) {} +// This implement the implicit contract needed by the SatCnfReader class. +class CpModelProtoWrapper { + public: + explicit CpModelProtoWrapper(CpModelProto* p) : problem_(p) {} - void SetNumVariables(int num) { - for (int i = 0; i < num; ++i) { - IntegerVariableProto* variable = problem->add_variables(); + void SetSizeAndPostprocess(int num_variables, int num_slacks) { + for (int i = 0; i < num_variables + num_slacks; ++i) { + IntegerVariableProto* variable = problem_->add_variables(); variable->add_domain(0); variable->add_domain(1); } + for (const int c : to_postprocess_) { + auto* literals = problem_->mutable_constraints(c) + ->mutable_bool_or() + ->mutable_literals(); + const int last_index = literals->size() - 1; + const int last = (*literals)[last_index]; + (*literals)[last_index] = + last >= 0 ? last + num_variables : last - num_variables; + } } - // TODO(user): Not supported. This is only used for displaying a wcnf - // solution in cnf format, so it is not useful internally. Instead of adding - // another field, we could use the variables names or the search heuristics - // to encode this info. - void SetOriginalNumVariables(int /*num*/) {} - int LiteralToRef(int signed_value) { return signed_value > 0 ? signed_value - 1 : signed_value; } - void AddConstraint(absl::Span clause) { - auto* constraint = problem->add_constraints()->mutable_bool_or(); + void AddConstraint(absl::Span clause, bool last_is_slack = false) { + if (last_is_slack) + to_postprocess_.push_back(problem_->constraints().size()); + auto* constraint = problem_->add_constraints()->mutable_bool_or(); constraint->mutable_literals()->Reserve(clause.size()); for (const int literal : clause) { constraint->add_literals(LiteralToRef(literal)); @@ -97,15 +125,17 @@ struct CpModelProtoWrapper { void AddObjectiveTerm(int literal, int64_t value) { CHECK_GE(literal, 0) << "Negative literal not supported."; - problem->mutable_objective()->add_vars(LiteralToRef(literal)); - problem->mutable_objective()->add_coeffs(value); + problem_->mutable_objective()->add_vars(LiteralToRef(literal)); + problem_->mutable_objective()->add_coeffs(value); } void SetObjectiveOffset(int64_t offset) { - problem->mutable_objective()->set_offset(offset); + problem_->mutable_objective()->set_offset(offset); } - CpModelProto* problem; + private: + CpModelProto* problem_; + std::vector to_postprocess_; }; // This class loads a file in cnf file format into a SatProblem. @@ -140,9 +170,10 @@ class SatCnfReader { private: template bool LoadInternal(const std::string& filename, Problem* problem) { - positive_literal_to_weight_.clear(); - objective_offset_ = 0; is_wcnf_ = false; + objective_offset_ = 0; + positive_literal_to_weight_.clear(); + end_marker_seen_ = false; hard_weight_ = 0; num_skipped_soft_clauses_ = 0; @@ -150,6 +181,10 @@ class SatCnfReader { num_added_clauses_ = 0; num_slack_variables_ = 0; + num_variables_ = 0; + num_clauses_ = 0; + actual_num_variables_ = 0; + int num_lines = 0; for (const std::string& line : FileLines(filename)) { ++num_lines; @@ -158,8 +193,13 @@ class SatCnfReader { if (num_lines == 0) { LOG(FATAL) << "File '" << filename << "' is empty or can't be read."; } - problem->SetOriginalNumVariables(num_variables_); - problem->SetNumVariables(num_variables_ + num_slack_variables_); + + if (num_variables_ > 0 && num_variables_ != actual_num_variables_) { + LOG(ERROR) << "Wrong number of variables ! Expected:" << num_variables_ + << " Seen:" << actual_num_variables_; + } + + problem->SetSizeAndPostprocess(actual_num_variables_, num_slack_variables_); // Fill the objective. if (!positive_literal_to_weight_.empty()) { @@ -168,14 +208,21 @@ class SatCnfReader { problem->AddObjectiveTerm(p.first, p.second); } } + for (const std::pair p : slack_literal_to_weight_) { + if (p.second != 0) { + problem->AddObjectiveTerm(actual_num_variables_ + p.first, p.second); + } + } problem->SetObjectiveOffset(objective_offset_); } - if (num_clauses_ != num_added_clauses_ + num_singleton_soft_clauses_ + - num_skipped_soft_clauses_) { - LOG(ERROR) << "Wrong number of clauses. " << num_clauses_ << " " - << num_added_clauses_; - return false; + // Some file from the max-sat competition seems to have the wrong number of + // clause !? I checked manually, so still parse them with best effort. + const int total_seen = num_added_clauses_ + num_singleton_soft_clauses_ + + num_skipped_soft_clauses_; + if (num_clauses_ > 0 && num_clauses_ != total_seen) { + LOG(ERROR) << "Wrong number of clauses ! Expected:" << num_clauses_ + << " Seen:" << total_seen; } return true; } @@ -222,6 +269,11 @@ class SatCnfReader { return; } + // The new wcnf format do not have header p line anymore. + if (num_variables_ == 0) { + is_wcnf_ = true; + } + static const char kWordDelimiters[] = " "; auto splitter = absl::StrSplit(line, kWordDelimiters, absl::SkipEmpty()); @@ -231,23 +283,35 @@ class SatCnfReader { bool first = true; bool end_marker_seen = false; for (const absl::string_view word : splitter) { - int64_t signed_value; - CHECK(absl::SimpleAtoi(word, &signed_value)); if (first && is_wcnf_) { - // Mathematically, a soft clause of weight 0 can be removed. - if (signed_value == 0) { - ++num_skipped_soft_clauses_; - return; - } - weight = signed_value; - } else { - if (signed_value == 0) { - end_marker_seen = true; - break; // end of clause. + first = false; + if (word == "h") { + // Hard clause in the new 2022 format. + // Note that hard_weight_ == 0 here. + weight = hard_weight_; + } else { + CHECK(absl::SimpleAtoi(word, &weight)); + CHECK_GE(weight, 0); + + // A soft clause of weight 0 can be removed. + if (weight == 0) { + ++num_skipped_soft_clauses_; + return; + } } - tmp_clause_.push_back(signed_value); + continue; } - first = false; + + int signed_value; + CHECK(absl::SimpleAtoi(word, &signed_value)); + if (signed_value == 0) { + end_marker_seen = true; + break; // end of clause. + } + + actual_num_variables_ = std::max(actual_num_variables_, + std::max(signed_value, -signed_value)); + tmp_clause_.push_back(signed_value); } if (!end_marker_seen) return; @@ -271,26 +335,20 @@ class SatCnfReader { } else { // The +1 is because a positive literal is the same as the 1-based // variable index. - const int slack_literal = num_variables_ + num_slack_variables_ + 1; - ++num_slack_variables_; + const int slack_literal = ++num_slack_variables_; + slack_literal_to_weight_[slack_literal] += weight; tmp_clause_.push_back(slack_literal); ++num_added_clauses_; - problem->AddConstraint(tmp_clause_); - - if (slack_literal > 0) { - positive_literal_to_weight_[slack_literal] += weight; - } else { - positive_literal_to_weight_[-slack_literal] -= weight; - objective_offset_ += weight; - } + problem->AddConstraint(tmp_clause_, /*last_is_slack=*/true); if (wcnf_use_strong_slack_) { // Add the binary implications slack_literal true => all the other // clause literals are false. for (int i = 0; i + 1 < tmp_clause_.size(); ++i) { - problem->AddConstraint({-slack_literal, -tmp_clause_[i]}); + problem->AddConstraint({-tmp_clause_[i], -slack_literal}, + /*last_is_slack=*/true); } } } @@ -300,22 +358,24 @@ class SatCnfReader { bool interpret_cnf_as_max_sat_; const bool wcnf_use_strong_slack_; - int num_clauses_; - int num_variables_; + int num_clauses_ = 0; + int num_variables_ = 0; + int actual_num_variables_ = 0; // Temporary storage for ProcessNewLine(). std::vector words_; // We stores the objective in a map because we want the variables to appear // only once in the LinearObjective proto. - absl::btree_map positive_literal_to_weight_; int64_t objective_offset_; + absl::btree_map positive_literal_to_weight_; + absl::btree_map slack_literal_to_weight_; // Used for the wcnf format. bool is_wcnf_; // Some files have text after %. This indicates if we have seen the '%'. bool end_marker_seen_; - int64_t hard_weight_; + int64_t hard_weight_ = 0; int num_slack_variables_; int num_skipped_soft_clauses_;