-
Notifications
You must be signed in to change notification settings - Fork 3
/
sim.c
210 lines (185 loc) · 5.38 KB
/
sim.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/* This file is part of the software similarity tester SIM.
Written by Dick Grune, Vrije Universiteit, Amsterdam.
$Id: sim.c,v 2.32 2012-11-28 20:49:52 Gebruiker Exp $
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "system.par"
#include "settings.par"
#include "sim.h"
#include "options.h"
#include "newargs.h"
#include "token.h"
#include "language.h"
#include "error.h"
#include "text.h"
#include "runs.h"
#include "hash.h"
#include "compare.h"
#include "pass1.h"
#include "pass2.h"
#include "pass3.h"
#include "percentages.h"
#include "stream.h"
#include "lang.h"
#include "Malloc.h"
/* command-line parameters */
size_t Min_Run_Size = DEFAULT_MIN_RUN_SIZE;
int Page_Width = DEFAULT_PAGE_WIDTH;
int Threshold_Percentage = 1; /* minimum percentage to show */
FILE *Output_File;
FILE *Debug_File;
/* and their string values, for language files that define their own parameters
*/
const char *token_name = "token";
const char *min_run_string;
const char *threshold_string;
const char *progname; /* for error reporting */
static const char *page_width_string;
static const char *output_name; /* for reporting */
static const struct option optlist[] = {
{'r', "minimum run size", 'N', &min_run_string},
{'w', "page width", 'N', &page_width_string},
{'f', "function-like forms only", ' ', 0},
{'F', "keep function identifiers in tact", ' ', 0},
{'d', "use diff format for output", ' ', 0},
{'T', "terse output", ' ', 0},
{'n', "display headings only", ' ', 0},
{'p', "use percentage format for output", ' ', 0},
{'P', "use percentage format, showing all combinations", ' ', 0},
{'t', "threshold level of percentage to show", 'N', &threshold_string},
{'e', "compare each file to each file separately", ' ', 0},
{'s', "do not compare a file to itself", ' ', 0},
{'S', "compare new files to old files only", ' ', 0},
{'R', "recurse into subdirectories", ' ', 0},
{'i', "read arguments (file names) from standard input", ' ', 0},
{'o', "write output to file F", 'F', &output_name},
{'M', "show memory usage info", ' ', 0},
{'-', "lexical scan output only", ' ', 0},
{0, 0, 0, 0}
};
static void
read_and_compare_files(int argc, const char **argv, int round) {
Read_Input_Files(argc, argv, round);
Make_Forward_References();
Compare_Files();
Free_Forward_References();
}
int
is_new_old_separator(const char *s) {
return strcmp(s, "/") == 0;
}
static void
reverse_new_input_files(int argc, const char *argv[]) {
int txt_first = 0;
int txt_last;
/* find the end of the new files */
for (txt_last = 0; txt_last < argc; txt_last++) {
if (is_new_old_separator(argv[txt_last])) break;
}
txt_last--;
/* swap the names from the outer sides on */
while (txt_first < txt_last) {
const char *tmp = argv[txt_first];
argv[txt_first] = argv[txt_last];
argv[txt_last] = tmp;
txt_first++, txt_last--;
}
}
int
main(int argc, const char *argv[]) {
/* Save program name */
progname = argv[0];
argv++, argc--; /* and skip it */
/* Set the default output and debug streams */
Output_File = stdout;
Debug_File = stdout;
/* Get command line options */
{ int nop = do_options(progname, optlist, argc, argv);
argc -= nop, argv += nop; /* and skip them */
}
/* Treat the value options */
if (min_run_string) {
Min_Run_Size = strtoul(min_run_string, NULL, 10);
if (Min_Run_Size == 0)
fatal("bad or zero run size; form is: -r N");
}
if (page_width_string) {
Page_Width = atoi(page_width_string);
if (Page_Width == 0)
fatal("bad or zero page width; form is: -w N");
}
if (threshold_string) {
Threshold_Percentage = atoi(threshold_string);
if ((Threshold_Percentage > 100) || (Threshold_Percentage <= 0))
fatal("threshold must be between 1 and 100");
}
if (output_name) {
Output_File = fopen(output_name, "w");
if (Output_File == 0) {
#define MSG_SIZE 500
char msg[MSG_SIZE];
snprintf(msg, MSG_SIZE, "cannot open output file %s",
output_name);
msg[MSG_SIZE - 1] = '\0';
fatal(msg);
/*NOTREACHED*/
}
}
if (is_set_option('P')) {
Threshold_Percentage = 1;
set_option('p');
}
if (is_set_option('p')) {
set_option('e');
set_option('s');
}
/* Treat the input-determining options */
if (is_set_option('i')) {
/* read input file names from standard input */
if (argc != 0)
fatal("-i option conflicts with file arguments");
get_new_std_input_args(&argc, &argv);
}
if (is_set_option('R')) {
get_new_recursive_args(&argc, &argv);
}
/* (argc, argv) now represents new_file* [ / old_file*] */
/* Here the real work starts */
Init_Language();
if (is_set_option('-')) {
/* Just the lexical scan */
while (argv[0]) {
const char *arg = argv[0];
if (!is_new_old_separator(arg)) {
Print_Stream(arg);
}
argv++;
}
}
else if (is_set_option('p')) {
/* Show percentages */
/* To compute the percentages fairly, the input files are read
twice, once in command line order, and once with the new
files in reverse order.
*/
read_and_compare_files(argc, argv, 1);
reverse_new_input_files(argc, argv);
read_and_compare_files(argc, argv, 2);
Show_Percentages();
} else {
/* Show runs */
read_and_compare_files(argc, argv, 1);
Retrieve_Runs();
Show_Runs();
}
if (is_set_option('M')) {
/* It is not trivial to plug the leaks, because data structures
point to each other, and have to be freed in the proper
order. But it is not impossible either. To do, perhaps.
*/
ReportMemoryLeaks(stderr);
}
return 0;
}