-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathicecast-soundexchange
executable file
·390 lines (294 loc) · 11.6 KB
/
icecast-soundexchange
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#!/usr/bin/perl
use 5.012; # so readdir assigns to $_ in a lone while test
use strict;
no strict "refs";
use Data::Dumper;
use Time::Local;
use Time::Piece;
use Date::Parse;
use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
use Getopt::Long;
Getopt::Long::Configure ("bundling");
use Pod::Usage;
use constant ICECASTLOGDIR => $ENV{ICECASTLOGDIR} || q(/var/log/icecast);
use constant OUTPUTDIR => $ENV{OUTPUTDIR} || q(/var/tmp);
use constant DEFAULT_MOUNTPOINTS => ('', 'high', 'low',);
use constant ONE_DAY => 60 * 60 * 24;
# Pattern is DD/Mon/YYYY
use constant DATE_PATTERN => qr(\d{2}/[A-Z][a-z]{2}/\d{4});
sub getLogFiles($$$$);
sub inDateRange($$$$);
sub mon($$);
sub main();
main;
1;
sub main() {
my $retVal = undef;
my ($result, $logDirectory, $startDate, $endDate, $outputDir, $zipFile, $verbose, $debug, $help, $man) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
my (@logContents) = ();
my @mountpoints = undef;
my $zip = undef;
$result = GetOptions (
"start-date|s=s" => \$startDate,
"end-date|e=s" => \$endDate,
"log-directory|l=s" => \$logDirectory,
"output-dir|o=s" => \$outputDir,
"mount-points|m=s" => \@mountpoints,
"zip-file|z=s" => \$zipFile,
"verbose|v+" => \$verbose,
"debug|d+" => \$debug,
"help|h" => \$help,
"man|M" => \$man,
);
die "GetOptions returned " . $result . ". Stopped" if (! $result);
pod2usage(1) if (defined($help) && $help);
pod2usage(-verbose => 2) if (defined($man) && $man);
if (! $startDate || ! $endDate) {
say STDERR "Please specify a starting date and an ending date for this report.";
}
$verbose = $debug if ($debug);
if ($#mountpoints == 1) {
@mountpoints = split(/, */, join(',', @mountpoints));
} else {
@mountpoints = DEFAULT_MOUNTPOINTS;
}
$logDirectory = $logDirectory || ICECASTLOGDIR;
if ($zipFile) {
$zip = Archive::Zip->new();
}
my $mountpointRE = join('|', @mountpoints);
$mountpointRE =~ s,\|,,;
my $start = str2time($startDate);
my $end = str2time($endDate);
$end += ONE_DAY;
say STDERR "Start date: ", $startDate, ": ", $start, ", End date: ", $endDate, ": ", $end if ($verbose);
# TODO: finish this (as of 2018-09-21)
my $logLines = getLogFiles($startDate, $endDate, $logDirectory, $verbose);
$outputDir = $outputDir || OUTPUTDIR;
#207.190.123.196 - - [05/Mar/2013:22:09:36 -0600] "GET /high HTTP/1.0" 200 134755 "-" "WinampMPEG/5.63, Ultravox/2.1" 4
#166.228.136.249 - - [05/Mar/2013:22:11:55 -0600] "GET /high HTTP/1.1" 200 1002080 "-" "BREW-Applet/0x010950A6 (BREW/3.1.5.189; DeviceId: 1388; Lang: en)" 92
foreach my $logLine (sort {
# Log line item: day month year hour minute second
# Array elements: 0 1 2 3 4 5
my @a = $a =~ qr(\d+\.\d+\.\d+\.\d+\s+-\s+-\s+\[(\d{2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2})\s+);
my @b = $b =~ qr(\d+\.\d+\.\d+\.\d+\s+-\s+-\s+\[(\d{2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2})\s+);
$a[2] <=> $b[2] ||
mon($a[1], 0) <=> mon($b[1], 0) ||
$a[0] <=> $b[0] ||
$a[3] <=> $b[3] ||
$a[4] <=> $b[4] ||
$a[5] <=> $b[5]
} @$logLines) {
next unless ($logLine =~ m#get\s+/($mountpointRE)\s+#i);
say STDERR $logLine if ($debug > 1);
# Bug Alert: the sub-expression "(\d{2})00" represents the
# timezone offset in the date/time field. It currently assumes
# an integral number of hours offset from GMT (UTC).
if ($logLine =~ m#^(\d+\.\d+\.\d+\.\d+)\s+-\s+-\s+\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2})\s+[+-](\d{2})00\]\s+"get\s+/($mountpointRE) http/\d\.\d"\s+(\d{3})\s+(\d+)\s+"([^"]+)"\s+"(.*)"\s+(\d+)$#i) {
my $ipAddress = $1;
my ($date, $hour, $minute, $second) = split(/:/, $2);
my $tzOffset = $3;
my $loggedMountPoint = $4;
my $statusCode = $5;
my $byteCount = $6;
my $referer = $7;
my $client = $8;
my $duration = $9;
my ($mday, $mon, $year) = split(/\//, $date);
say STDERR "ipAddress: $ipAddress + referer: $referer + client: $client" if ($debug);
# This time is actually the time the user disconnected
# from the stream. We need to calculate the time at
# which they connected to the stream by subtracting
# the duration from this 'end time'
my $time = timelocal($second, $minute, $hour, $mday, mon($mon, $verbose), $year);
$time -= $duration;
my $connectTime = gmtime($time);
next unless ($connectTime >= $start && $connectTime <= $end);
say STDERR
$ipAddress, " ",
$connectTime->ymd, " ",
$connectTime->hms, " ",
$loggedMountPoint, " ", $duration, " ", $statusCode,
" ->", $referer, "<- ->", $client, "<-"
if ($verbose > 2);
my $entry = sprintf
"%s\t%s\t%s\t%s\t%d\t%03d\t%s/%s",
$ipAddress,
$connectTime->ymd,
$connectTime->hms,
$loggedMountPoint,
$duration,
$statusCode,
$referer,
$client
;
# collect the entries per-mountpoint
push(@$loggedMountPoint, $entry);
} else {
say STDERR "Regex match failed for logLine:\n\t", $logLine;
}
}
# write per-mountpoint files with all collected entries
for my $mount (@mountpoints) {
next unless ($mount);
# Stash the per-mountpoint files into the zip archive if so requested.
if ($zipFile) {
# Note the <Tab> characters in the string.
my $zipString = join("\r\n", ("IPAddress Date Time Stream Duration Status Referrer", @$mount, ''));
my $stringMember = $zip->addString($zipString, "$mount.txt");
$stringMember->desiredCompressionMethod(COMPRESSION_DEFLATED);
say STDERR "Added $mount.txt to '$zipFile'." if ($verbose);
} else {
open(OUT, ">", "$outputDir/$mount.txt") || die "Cannot open $outputDir/$mount.txt for writing ($!). Stopped";
# Again, note the <Tab> characters in the string.
print OUT "IPAddress Date Time Stream Duration Status Referrer\r\n";
foreach my $line (@$mount) {
print OUT $line, "\r\n";
}
close(OUT);
}
}
if ($zipFile) {
unless ($zip->writeToFileNamed($zipFile) == AZ_OK) {
die "Error writing to Zip archive '$zipFile'. Stopped";
}
}
}
# Get the contents of the log files that contain entries in the range
# of our start and end dates.
# Returns an array ref containing all relevant log file entries.
sub getLogFiles($$$$) {
my $startDate = shift;
my $endDate = shift;
my $logDirectory = shift;
my $verbose = shift;
my @logFileNames = ();
my @validLogLines = ();
# strptime() returns this array: ($ss,$mm,$hh,$day,$month,$year,$zone)
my @start = strptime($startDate);
my $startYYYYMMDD = sprintf("%04d%02d%02d", $start[5] + 1900, $start[4] + 1, $start[3]);
my @end = strptime($endDate);
my $endYYYYMMDD = sprintf("%04d%02d%02d", $end[5] + 1900, $end[4] + 1, $end[3]);
opendir(my $dirH, $logDirectory) ||
die "Cannot open icecast log directory '$logDirectory'. Stopped";
while (readdir $dirH) {
next unless (/^access/);
my $lf = undef;
my $lfDate = undef;
# Log filename may end with a timestamp suffix, or simply
# ".log". Skip log files with timestamps outside our date
# range. We always want to examine "access.log".
($lfDate = $_) =~ s/^access\.log(\.(\d{8})_\d+)*/$2/;
next if ($lfDate && ($lfDate < $startYYYYMMDD || $lfDate > $endYYYYMMDD));
say STDERR "Looking at $_" if ($verbose);
open(LF, $logDirectory . "/" . $_) ||
die "Cannot open " . $logDirectory . "/" . $_ . " for reading. Stopped";
my @lines = <LF>;
close(LF);
say STDERR $_, " has ", $#lines, " lines" if ($verbose > 1);
if (inDateRange(\@lines, $startDate, $endDate, $verbose)) {
push(@validLogLines, @lines);
}
}
closedir $dirH;
say STDERR Dumper @logFileNames if ($verbose);
say STDERR "Found ", $#validLogLines, " log lines." if ($verbose);
\@validLogLines;
}
# Determine whether the list of lines is within the startDate and
# endDate range.
# Returns true if so, otherwise false.
sub inDateRange($$$$) {
my $lines = shift;
my $startDate = shift;
my $endDate = shift;
my $verbose = shift;
my $s = str2time($startDate);
my $e = str2time($endDate);
$e += ONE_DAY;
foreach my $line (@$lines) {
my ($j1, $dateTime, $j2) = split(/[\[\]]/, $line);
my $lineDate = str2time($dateTime);
say STDERR "$lineDate in date range $s - $e" if (($verbose > 1) && ($lineDate >= $s && $lineDate <= $e));
return 1 if ($lineDate >= $s && $lineDate <= $e);
}
0;
}
# Return the zero-based decimal month number for the month name
# passed.
sub mon($$) {
my $monthName = shift;
my $verbose = shift;
my $months = {
Jan => 0,
Feb => 1,
Mar => 2,
Apr => 3,
May => 4,
Jun => 5,
Jul => 6,
Aug => 7,
Sep => 8,
Oct => 9,
Nov => 10,
Dec => 11,
};
$months->{$monthName};
}
__END__;
=head1 NAME
B<icecast-soundexchange> - munge icecast logs into NPR soundexchange format
=head1 SYNOPSIS
icecast-soundexchange [ --help ] [ --man ] [ --debug ] [ --verbose ]
[ --log-directory (-l) <directory-path> ]
[ --mount-points (-m) <comma-separated-list> ]
[ --output-directory (-o) <full-path-to-a-directory> ]
[ --zip-file (-z) <zip-file> ]
--start-date <start-date> --end-date <end-date>
=head1 DESCRIPTION
B<icecast-soundexchange> reads icecast access log files and writes tab-separated fields into (DOS-format) text files, one per icecast mount-point.
B<--start-date> and B<--end-date> are required.
By default B<icecast-soundexchange> searches for log files in the directory I</var/log/icecast>. You may set and export the environment variable I<ICECASTLOGDIR> to a directory containing your icecast log files or use the B<--log-directory> option to specify that directory.
See
http://digitalservices.npr.org/post/soundexchange-streaming-file-format-standard-announced
for details of the file format.
=head1 OPTIONS
Long options consist of (possibly hyphenated) English words and begin with I<-->; short options consist of a single letter and begin with a single hyphen I<->.
=over 8
=item B<--start-date (-s) E<lt>start-dateE<gt>>
(REQUIRED) specify the starting date for the log messages.
Format: DD/Mon/YYYY where Mon is the 3-letter abbreviation for the month name
=item B<--end-date (-e) E<lt>end-dateE<gt>>
(REQUIRED) specify the ending date for the log messages.
Format: DD/Mon/YYYY where Mon is the 3-letter abbreviation for the month name
=item B<--help (-h)>
show a brief usage message
=item B<--man (-M)>
show the complete manual page for this command
=item B<--debug (-d)>
launch B<icecast-soundexchange> in verbose debugging mode. Specify
multiple times for more debugging.
=item B<--verbose (-v)>
launch B<icecast-soundexchange> in verbose mode (not quite as verbose as B<--debug>)
=item B<--mount-points (-m) E<lt>comma-separated-listE<gt>>
Specify one or more mount points for which to report. All mount points
must match actual mount points provided by the Icecast server.
=item B<--log-directory (-l) E<lt>directory-nameE<gt>>
name the directory path that contains the Icecast log files..
The default is in the perl constant ICECASTLOGDIR (I</var/log/icecast>).
=item B<--zip-file (-z) E<lt>zip-filenameE<gt>>
name the Zip archive file in which to place the output files. There is
no default.
=item B<--output-directory (-o) E<lt>full-path-to-a-directoryE<gt>>
Specify the directory name for the output files. This option is
mutually exclusive with B<--zip-file>. That is, you may either request
individual files named for the mount points, I<or> you may request a
zip archive containing all the mount-point files.
=back
=head1 SEE ALSO
=over 8
=item perl(1)
=item icecast(1)
=item http://digitalservices.npr.org/post/soundexchange-streaming-file-format-standard-announced
=back
=cut