-
Notifications
You must be signed in to change notification settings - Fork 1
/
sorter.cc
89 lines (81 loc) · 2.72 KB
/
sorter.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ---
// Author: Roberto Bayardo
#include "sorter.h"
#include <stdio.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include "basic-types.h"
#include "data-source-iterator.h"
#include "set-properties.h"
namespace google_extremal_sets {
SetPropertiesCompareFunctor compare_set_properties;
SetPropertiesCardinalityCompareFunctor compare_set_properties_cardinality;
bool Sort(
DataSourceIterator* data, const char* output_path, bool by_cardinality) {
FILE* output_file = fopen(output_path, "wb");
if (!output_file) {
std::cerr << "; Could not open output file for writing: "
<< output_path << "\n";
return false;
}
uint32_t set_id;
std::vector<uint32_t> itemset;
std::vector<SetProperties*> sort_us;
int result;
std::cerr << "; Reading data..." << std::endl;
while ((result = data->Next(&set_id, &itemset)) == 1) {
SetProperties* set = SetProperties::Create(set_id, itemset);
// Check for itemsets that are not properly sorted / contain
// duplicate items.
bool not_sorted = false;
for (uint32_t i = 0; i < set->size - 1; ++i) {
if (set->item[i] >= set->item[i + 1]) {
not_sorted = true;
break;
}
}
if (not_sorted) {
std::cerr << "; WARNING: Skipping invalid set. " << *set << '\n';
SetProperties::Delete(set);
} else {
sort_us.push_back(SetProperties::Create(set_id, itemset));
}
}
if (result < 0)
return false;
std::cerr << "; Sorting ("
<< (by_cardinality ? "by cardinality" : "lexicographic")
<< ") ..." << std::endl;
if (by_cardinality)
sort(sort_us.begin(), sort_us.end(), compare_set_properties_cardinality);
else
sort(sort_us.begin(), sort_us.end(), compare_set_properties);
std::cerr
<< "; Writing " << sort_us.size() << " itemsets to file..." << std::endl;
for (uint32_t i = 0; i < sort_us.size(); ++i) {
SetProperties* set = sort_us[i];
if (!fwrite(set, sizeof(uint32_t), 2 + set->size, output_file)) {
// TODO: fix mem leak
return false;
}
SetProperties::Delete(set);
}
if (fclose(output_file))
return false;
return true;
}
} // namespace google_extremal_sets