-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurlbst
executable file
·707 lines (643 loc) · 19.8 KB
/
urlbst
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
#! /usr/bin/perl -w
#
# Usage: ./urlbst.pl [--eprint] [--doi] [--pubmed]
# [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref]
# [input-file [output-file]]
# If either input-file or output-file is omitted, they are replaced by
# stdin or stdout respectively.
#
# See http://purl.org/nxg/dist/urlbst for documentation
#
# $Id$
$version = '0.6-4';
($progname = $0) =~ s/.*\///;
$mymarker = "% $progname";
$mymarkerend = "% ...$progname to here";
$myurl = 'http://purl.org/nxg/dist/urlbst';
$infile = '-';
$outfile = '-';
$addeprints = 0; # if true (nonzero) we add support for eprints
$eprintprefix = 'arXiv:'; # make these settable with --eprint? syntax?
$eprinturl = 'http://arxiv.org/abs/';
$adddoiresolver = 0;
$doiprefix = 'doi:';
$doiurl = 'http://dx.doi.org/';
$addpubmedresolver = 0;
$pubmedprefix = 'PMID:';
$pubmedurl = 'http://www.ncbi.nlm.nih.gov/pubmed/';
$makehref = 0;
$availablestring = "Available from: ";
$inlinelinks = 0;
$Usage = "$progname [--eprint] [--doi] [--pubmed]\n [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref]\n [--help] [input-file [output-file]]";
while ($#ARGV >= 0) {
if ($ARGV[0] eq '--eprint') {
$addeprints = 1;
} elsif ($ARGV[0] eq '--doi') {
$adddoiresolver = 1;
} elsif ($ARGV[0] eq '--pubmed') {
$addpubmedresolver = 1;
} elsif ($ARGV[0] eq '--nohyperlinks') {
$makehref = 0;
} elsif ($ARGV[0] eq '--hypertex') {
$makehref = 1;
} elsif ($ARGV[0] eq '--hyperref') {
$makehref = 2;
} elsif ($ARGV[0] eq '--inlinelinks') {
$inlinelinks = 1;
} elsif ($ARGV[0] eq '--help') {
print <<EOD;
urlbst version $version
Usage: $Usage
--eprint: include support for `eprint' fields
--doi: include support for `doi' field
--pubmed: include support for `pubmed' field
--nohyperlinks do not include active links anywhere
--inlinelinks add hyperlinks to entry titles
--hypertex: include HyperTeX-style hyperlink support
--hyperref: include {hyperref}-style hyperlink support
(generally better)
--help: print this help
Input and output files may be given as `-' (default) to indicate stdin/out
EOD
exit(0);
} elsif ($ARGV[0] =~ /^-/) {
die "Unrecognised option $ARGV[0]: Usage: $Usage\n";
} elsif ($infile eq '-') {
$infile = $ARGV[0];
} elsif ($outfile eq '-') {
$outfile = $ARGV[0];
} else {
die "Usage: $Usage\n";
}
shift(@ARGV);
}
if ($inlinelinks && $makehref == 0) {
print <<'EOD';
Warning: --inlinelinks and --nohyperlinks were both specified (possibly
implicitly). That combination makes no sense, so I'll ignore
--nohyperlinks and use --hyperref instead
EOD
$makehref = 2;
}
$exitstatus = 0; # success status
open (IN, "<$infile") || die "Can't open $infile to read";
open (OUT, ">$outfile") || die "Can't open $outfile to write";
# We have to make certain assumptions about the source files, in order
# to patch them at the correct places. Specifically, we assume that
#
# - there's a function init.state.consts
#
# - ...and an output.nonnull which does the actual outputting, which
# has the `usual' interface.
#
# - we can replace
# fin.entry
# by
# new.block
# output.url % the function which formats and displays any URL
# fin.entry
#
# - there is a function which handles the `article' entry type (this
# will always be true)
#
# - there is a function output.bibitem which is called at the
# beginning of each entry type
# - ...and one called fin.entry which is called at the end
#
# If the functions format.date, format.title or new.block are not defined (the
# former is not in apalike, for example, and the last is not in the
# AMS styles), then replacements are included in the output.
#
# All these assumptions are true of the standard files and, since most
# style files derive from them more or less directly, are true of most (?)
# other style files, too.
#
# There's some rather ugly Perl down here. The parsing for
# brace-matching could probably do with being rewritten in places, to
# make it less ugly, and more robust.
print OUT "%%% Modification of BibTeX style file ", ($infile eq '-' ? '<stdin>' : $infile), "\n";
print OUT "%%% ... by $progname, version $version (marked with \"$mymarker\")\n%%% See <$myurl>\n";
print OUT "%%% Added webpage entry type, and url and lastchecked fields.\n";
print OUT "%%% Added eprint support.\n" if ($addeprints);
print OUT "%%% Added DOI support.\n" if ($adddoiresolver);
print OUT "%%% Added PUBMED support.\n" if ($addpubmedresolver);
print OUT "%%% Added HyperTeX support.\n" if ($makehref == 1);
print OUT "%%% Added hyperref support.\n" if ($makehref == 2);
print OUT "%%% Original headers follow...\n\n";
$found{initconsts} = 0;
$found{outputnonnull} = 0;
$found{article} = 0;
$found{outputbibitem} = 0;
$found{finentry} = 0;
$found{formatdate} = 0;
$found{formattitle} = 0;
$found{newblock} = 0;
while (<IN>) {
/^ *%/ && do {
# Pass commented lines unchanged
print OUT;
next;
};
/^ *ENTRY/ && do {
# Work through the list of entry types, finding what ones are there.
# If we find a URL entry there already, object, since these edits
# will mess things up.
$line = $_;
until ($line =~ /\{\s*(\w*)/) {
$line .= <IN>;
}
$bracematchtotal = 0; # reset
bracematcher($line);
$line =~ /\{\s*(\w*)/;
$found{'entry'.$1} = 1;
print OUT $line;
$line = <IN>;
until (bracematcher($line) == 0) {
# XXX deal with multiple entries on one line
($line =~ /^\s*(\w*)/) && ($found{'entry'.$1} = 1);
print OUT $line;
$line = <IN>;
}
if (defined($found{entryurl})) {
print STDERR "$progname: style file $infile already has URL entry!\n";
# print out the rest of the file, and give up
print OUT $line;
while (<IN>) {
print OUT;
}
$exitstatus = 1;
last;
} else {
print OUT " eprint $mymarker\n doi $mymarker\n pubmed $mymarker\n url $mymarker\n lastchecked $mymarker\n";
}
print OUT $line;
next;
};
/^ *FUNCTION *\{init\.state\.consts\}/ && do {
# In the init.state.consts function, add an extra set of
# constants at the beginning. Also use this as the marker for
# the place to add the init strings function.
print OUT <<EOD;
STRINGS { urlintro eprinturl eprintprefix doiprefix doiurl pubmedprefix pubmedurl openinlinelink closeinlinelink } $mymarker...
INTEGERS { hrefform inlinelinks makeinlinelink addeprints adddoiresolver addpubmedresolver }
% Following constants may be adjusted by hand, if desired
FUNCTION {init.urlbst.variables}
{
"$availablestring" 'urlintro := % prefix before URL
"$eprinturl" 'eprinturl := % prefix to make URL from eprint ref
"$eprintprefix" 'eprintprefix := % text prefix printed before eprint ref
"$doiurl" 'doiurl := % prefix to make URL from DOI
"$doiprefix" 'doiprefix := % text prefix printed before DOI ref
"$pubmedurl" 'pubmedurl := % prefix to make URL from PUBMED
"$pubmedprefix" 'pubmedprefix := % text prefix printed before PUBMED ref
#$addeprints 'addeprints := % 0=no eprints; 1=include eprints
#$adddoiresolver 'adddoiresolver := % 0=no DOI resolver; 1=include it
#$addpubmedresolver 'addpubmedresolver := % 0=no PUBMED resolver; 1=include it
#$makehref 'hrefform := % 0=no crossrefs; 1=hypertex xrefs; 2=hyperref refs
#$inlinelinks 'inlinelinks := % 0=URLs explicit; 1=URLs attached to titles
% the following are internal state variables, not config constants
#0 'makeinlinelink := % state variable managed by possibly.setup.inlinelink
"" 'openinlinelink := % ditto
"" 'closeinlinelink := % ditto
}
INTEGERS {
bracket.state
outside.brackets
open.brackets
within.brackets
close.brackets
}
$mymarkerend
EOD
$line = $_;
until ($line =~ /\{.*\}.*\{/s) {
$line .= <IN>;
}
$line =~ s/(\{.*?\}.*?\{)/$1 #0 'outside.brackets := $mymarker
#1 'open.brackets :=
#2 'within.brackets :=
#3 'close.brackets :=
/s;
print OUT $line;
$found{initconsts} = 1;
next;
};
/^ *EXECUTE *\{init\.state\.consts\}/ && do {
print OUT "EXECUTE {init.urlbst.variables}\n";
print OUT;
next;
};
/^ *FUNCTION *\{new.block\}/ && do {
$found{newblock} = 1;
};
/^ *FUNCTION *{output\.nonnull}/ && do {
print OUT "$mymarker\n";
print OUT "FUNCTION {output.nonnull.original}\n";
copy_block();
print_output_functions();
$found{outputnonnull} = 1;
next;
};
/FUNCTION *\{fin.entry\}/ && do {
# Rename fin.entry to fin.entry.original (wrapped below)
s/fin.entry/fin.entry.original/;
$found{finentry} = 1;
print OUT;
next;
};
/^ *FUNCTION *{format\.date}/ && do {
$found{formatdate} = 1;
print OUT;
next;
};
/^ *FUNCTION *{format\.title}/ && do {
# record that we found this
$found{formattitle} = 1;
print OUT;
next;
};
/^ *format\.b?title/ && do {
# interpolate a call to possibly.setup.inlinelink
print OUT " title empty\$ 'skip\$ 'possibly.setup\.inlinelink if\$ $mymarker\n";
print OUT;
next;
};
/^ *format\.vol\.num\.pages/ && do {
# interpolate a call to possibly.setup.inlinelink
s/^( *)/$1possibly.setup.inlinelink /;
s/$/$mymarker/;
print OUT;
next;
};
/^ *FUNCTION *\{article\}/ && do {
print_missing_functions();
print_webpage_def();
print OUT;
$found{article} = 1;
next;
};
/FUNCTION *\{output.bibitem\}/ && do {
# Rename output.bibitem to output.bibitem.original (wrapped below)
s/{output.bibitem\}/\{output.bibitem.original\}/;
$found{outputbibitem} = 1;
print OUT;
next;
};
print OUT;
};
if ($exitstatus == 0) {
# Skip this if we've already reported an error -- it'll only be confusing
foreach $k (keys %found) {
if ($found{$k} == 0) {
print STDERR "$progname: $infile: failed to find feature $k\n";
}
}
}
close (IN);
close (OUT);
exit $exitstatus;;
sub print_output_functions {
print OUT "$mymarker...\n";
print OUT <<'EOD';
% The following three functions are for handling inlinelink. They wrap
% a block of text which is potentially output with write$ by multiple
% other functions, so we don't know the content a priori.
% They communicate between each other using the variables makeinlinelink
% (which is true if a link should be made), and closeinlinelink (which holds
% the string which should close any current link. They can be called
% at any time, but start.inlinelink will be a no-op unless something has
% previously set makeinlinelink true, and the two ...end.inlinelink functions
% will only do their stuff if start.inlinelink has previously set
% closeinlinelink to be non-empty.
FUNCTION {possibly.setup.inlinelink}
{ makeinlinelink
{ hrefform #1 = % hypertex
{ "\special {html:<a href=" quote$ * url * quote$ * "> }{" * 'openinlinelink :=
"\special {html:</a>}" 'closeinlinelink :=
}
{ hrefform #2 = % hyperref
% the space between "} {" matters: a URL of just the right length can cause "\% newline em"
{ "\href{" url * "} {" * 'openinlinelink :=
"}" 'closeinlinelink :=
}
'skip$
if$ % hrefform #2 =
}
if$ % hrefform #1 =
#0 'makeinlinelink :=
}
'skip$
if$ % makeinlinelink
}
FUNCTION {add.inlinelink}
{ openinlinelink empty$
'skip$
{ openinlinelink swap$ * closeinlinelink *
"" 'openinlinelink :=
}
if$
}
EOD
# new.block is defined elsewhere
print OUT <<'EOD';
FUNCTION {output.nonnull}
{ % Save the thing we've been asked to output
's :=
% If the bracket-state is close.brackets, then add a close-bracket to
% what is currently at the top of the stack, and set bracket.state
% to outside.brackets
bracket.state close.brackets =
{ "]" *
outside.brackets 'bracket.state :=
}
'skip$
if$
bracket.state outside.brackets =
{ % We're outside all brackets -- this is the normal situation.
% Write out what's currently at the top of the stack, using the
% original output.nonnull function.
s
add.inlinelink
output.nonnull.original % invoke the original output.nonnull
}
{ % Still in brackets. Add open-bracket or (continuation) comma, add the
% new text (in s) to the top of the stack, and move to the close-brackets
% state, ready for next time (unless inbrackets resets it). If we come
% into this branch, then output.state is carefully undisturbed.
bracket.state open.brackets =
{ " [" * }
{ ", " * } % bracket.state will be within.brackets
if$
s *
close.brackets 'bracket.state :=
}
if$
}
% Call this function just before adding something which should be presented in
% brackets. bracket.state is handled specially within output.nonnull.
FUNCTION {inbrackets}
{ bracket.state close.brackets =
{ within.brackets 'bracket.state := } % reset the state: not open nor closed
{ open.brackets 'bracket.state := }
if$
}
FUNCTION {format.lastchecked}
{ lastchecked empty$
{ "" }
{ inbrackets "cited " lastchecked * }
if$
}
EOD
print OUT "$mymarkerend\n";
}
sub print_webpage_def {
print OUT "$mymarker...\n";
# Some of the functions below call new.block, so we need a dummy
# version, in the case where the style being edited doesn't supply
# that function.
if (! $found{newblock}) {
print OUT "FUNCTION {new.block} % dummy new.block function\n{\n % empty\n}\n\n";
$found{newblock} = 1;
}
print OUT <<'EOD';
% Functions for making hypertext links.
% In all cases, the stack has (link-text href-url)
%
% make 'null' specials
FUNCTION {make.href.null}
{
pop$
}
% make hypertex specials
FUNCTION {make.href.hypertex}
{
"\special {html:<a href=" quote$ *
swap$ * quote$ * "> }" * swap$ *
"\special {html:</a>}" *
}
% make hyperref specials
FUNCTION {make.href.hyperref}
{
"\href {" swap$ * "} {\path{" * swap$ * "}}" *
}
FUNCTION {make.href}
{ hrefform #2 =
'make.href.hyperref % hrefform = 2
{ hrefform #1 =
'make.href.hypertex % hrefform = 1
'make.href.null % hrefform = 0 (or anything else)
if$
}
if$
}
% If inlinelinks is true, then format.url should be a no-op, since it's
% (a) redundant, and (b) could end up as a link-within-a-link.
FUNCTION {format.url}
{ inlinelinks #1 = url empty$ or
{ "" }
{ hrefform #1 =
{ % special case -- add HyperTeX specials
urlintro "\url{" url * "}" * url make.href.hypertex * }
{ urlintro "\url{" * url * "}" * }
if$
}
if$
}
FUNCTION {format.eprint}
{ eprint empty$
{ "" }
{ eprintprefix eprint * eprinturl eprint * make.href }
if$
}
FUNCTION {format.doi}
{ doi empty$
{ "" }
{ doiprefix doi * doiurl doi * make.href }
if$
}
FUNCTION {format.pubmed}
{ pubmed empty$
{ "" }
{ pubmedprefix pubmed * pubmedurl pubmed * make.href }
if$
}
% Output a URL. We can't use the more normal idiom (something like
% `format.url output'), because the `inbrackets' within
% format.lastchecked applies to everything between calls to `output',
% so that `format.url format.lastchecked * output' ends up with both
% the URL and the lastchecked in brackets.
FUNCTION {output.url}
{ url empty$
'skip$
{ new.block
format.url output
format.lastchecked output
}
if$
}
FUNCTION {output.web.refs}
{
new.block
output.url
addeprints eprint empty$ not and
{ format.eprint output.nonnull }
'skip$
if$
adddoiresolver doi empty$ not and
{ format.doi output.nonnull }
'skip$
if$
addpubmedresolver pubmed empty$ not and
{ format.pubmed output.nonnull }
'skip$
if$
}
% Wrapper for output.bibitem.original.
% If the URL field is not empty, set makeinlinelink to be true,
% so that an inline link will be started at the next opportunity
FUNCTION {output.bibitem}
{ outside.brackets 'bracket.state :=
output.bibitem.original
inlinelinks url empty$ not and
{ #1 'makeinlinelink := }
{ #0 'makeinlinelink := }
if$
}
% Wrapper for fin.entry.original
FUNCTION {fin.entry}
{ output.web.refs % urlbst
makeinlinelink % ooops, it appears we didn't have a title for inlinelink
{ possibly.setup.inlinelink % add some artificial link text here, as a fallback
"[link]" output.nonnull }
'skip$
if$
bracket.state close.brackets = % urlbst
{ "]" * }
'skip$
if$
fin.entry.original
}
% Webpage entry type.
% Title and url fields required;
% author, note, year, month, and lastchecked fields optional
% See references
% ISO 690-2 http://www.nlc-bnc.ca/iso/tc46sc9/standard/690-2e.htm
% http://www.classroom.net/classroom/CitingNetResources.html
% http://neal.ctstateu.edu/history/cite.html
% http://www.cas.usf.edu/english/walker/mla.html
% for citation formats for web pages.
FUNCTION {webpage}
{ output.bibitem
author empty$
{ editor empty$
'skip$ % author and editor both optional
{ format.editors output.nonnull }
if$
}
{ editor empty$
{ format.authors output.nonnull }
{ "can't use both author and editor fields in " cite$ * warning$ }
if$
}
if$
new.block
title empty$ 'skip$ 'possibly.setup.inlinelink if$
format.title "title" output.check
inbrackets "online" output
new.block
year empty$
'skip$
{ format.date "year" output.check }
if$
% We don't need to output the URL details ('lastchecked' and 'url'),
% because fin.entry does that for us, using output.web.refs. The only
% reason we would want to put them here is if we were to decide that
% they should go in front of the rather miscellaneous information in 'note'.
new.block
note output
fin.entry
}
EOD
print OUT "$mymarkerend\n\n\n";
}
sub print_missing_functions {
# We've got to the bit of the file which handles the entry
# types, so write out the webpage entry handler. This uses
# the format.date function, which which many but not all
# bst files have (for example, apalike doesn't). So
# check that we either have found this function already, or
# add it.
if (! $found{formatdate}) {
if ($found{entrymonth}) {
print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
{ month empty$
{ "" }
{ "there's a month but no year in " cite$ * warning$
month
}
if$
}
{ month empty$
'year
{ month " " * year * }
if$
}
if$
}
EOD
} else {
print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
'skip$
{ %write$
"(" year * ")" *
}
if$
}
EOD
}
$found{formatdate} = 1;
}
# If the style file didn't supply a format.title function, then supply
# one here (the {webpage} function requires it).
if (! $found{formattitle}) {
print OUT <<'EOD';
FUNCTION {format.title}
{ title empty$
{ "" }
{ title "t" change.case$ }
if$
}
EOD
$found{formattitle} = 1;
}
}
# Utility function: Keep track of open and close braces in the string argument.
# Keep state in $bracematchtotal, return the current value.
sub bracematcher {
my $s = shift;
$s =~ s/[^\{\}]//g;
#print "s=$s\n";
foreach my $c (split (//, $s)) {
$bracematchtotal += ($c eq '{' ? 1 : -1);
}
return $bracematchtotal;
}
# Utility function: use bracematcher to copy the complete block which starts
# on or after the current line.
sub copy_block {
$bracematchtotal = 0;
# copy any leading lines which don't have braces (presumably comments)
while (defined ($line = <IN>) && ($line !~ /{/)) {
print OUT $line;
}
while (defined ($line) && bracematcher($line) > 0) {
print OUT $line;
$line = <IN>;
}
print OUT "$line\n"; # print out terminating \} (assumed
# alone on the line)
}