These_linjie_JC/thesis/Latex/Classes/urlbst

677 lines
19 KiB
Perl
Executable File

#! /usr/bin/env perl
#
# Usage: ./urlbst.pl [--eprint] [--doi]
# [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref]
# [input-file [output-file]]
# If either input-file or output-file is omitted, they are replaced by
# stdin or stdout respectively.
#
# See http://purl.org/nxg/dist/urlbst for documentation
#
# $Id: urlbst.in,v 1.8 2007/03/26 17:21:56 norman Exp $
$version = '0.6';
($progname = $0) =~ s/.*\///;
$mymarker = "% $progname";
$mymarkerend = "% ...$progname to here";
$myurl = 'http://purl.org/nxg/dist/urlbst';
$infile = '-';
$outfile = '-';
$addeprints = 0; # if true (nonzero) we add support for eprints
$eprintprefix = 'arXiv:'; # make these settable with --eprint? syntax?
$eprinturl = 'http://arxiv.org/abs/';
$adddoiresolver = '0';
$doiprefix = 'doi:';
$doiurl = 'http://dx.doi.org/';
$makehref = 0;
$availablestring = "Available from: ";
$inlinelinks = 0;
$Usage = "$progname [--eprint] [--doi]\n [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref]\n [--help] [input-file [output-file]]";
while ($#ARGV >= 0) {
if ($ARGV[0] eq '--eprint') {
$addeprints = 1;
} elsif ($ARGV[0] eq '--doi') {
$adddoiresolver = 1;
} elsif ($ARGV[0] eq '--nohyperlinks') {
$makehref = 0;
} elsif ($ARGV[0] eq '--hypertex') {
$makehref = 1;
} elsif ($ARGV[0] eq '--hyperref') {
$makehref = 2;
} elsif ($ARGV[0] eq '--inlinelinks') {
$inlinelinks = 1;
} elsif ($ARGV[0] eq '--help') {
print <<EOD;
urlbst version $version
Usage: $Usage
--eprint: include support for `eprint' fields
--doi: include support for `doi' field
--nohyperlinks do not include active links anywhere
--inlinelinks add hyperlinks to entry titles
--hypertex: include HyperTeX-style hyperlink support
--hyperref: include {hyperref}-style hyperlink support
(generally better)
--help: print this help
Input and output files may be given as `-' (default) to indicate stdin/out
EOD
exit(0);
} elsif ($ARGV[0] =~ /^-/) {
die "Unrecognised option $ARGV[0]: Usage: $Usage\n";
} elsif ($infile eq '-') {
$infile = $ARGV[0];
} elsif ($outfile eq '-') {
$outfile = $ARGV[0];
} else {
die "Usage: $Usage\n";
}
shift(@ARGV);
}
if ($inlinelinks && $makehref == 0) {
print <<'EOD';
Warning: --inlinelinks and --nohyperlinks were both specified (possibly
implicitly). That combination makes no sense, so I'll ignore
--nohyperlinks and use --hyperref instead
EOD
$makehref = 2;
}
$exitstatus = 0; # success status
open (IN, "<$infile") || die "Can't open $infile to read";
open (OUT, ">$outfile") || die "Can't open $outfile to write";
# We have to make certain assumptions about the source files, in order
# to patch them at the correct places. Specifically, we assume that
#
# - there's a function init.state.consts
#
# - ...and an output.nonnull which does the actual outputting, which
# has the `usual' interface.
#
# - we can replace
# fin.entry
# by
# new.block
# output.url % the function which formats and displays any URL
# fin.entry
#
# - there is a function which handles the `article' entry type (this
# will always be true)
#
# - there is a function output.bibitem which is called at the
# beginning of each entry type
# - ...and one called fin.entry which is called at the end
#
# If the functions format.date, format.title or new.block are not defined (the
# former is not in apalike, for example, and the last is not in the
# AMS styles), then replacements are included in the output.
#
# All these assumptions are true of the standard files and, since most
# style files derive from them more or less directly, are true of most (?)
# other style files, too.
#
# There's some rather ugly Perl down here. The parsing for
# brace-matching could probably do with being rewritten in places, to
# make it less ugly, and more robust.
print OUT "%%% Modification of BibTeX style file ", ($infile eq '-' ? '<stdin>' : $infile), "\n";
print OUT "%%% ... by $progname, version $version (marked with \"$mymarker\")\n%%% See <$myurl>\n";
print OUT "%%% Added webpage entry type, and url and lastchecked fields.\n";
print OUT "%%% Added eprint support.\n" if ($addeprints);
print OUT "%%% Added DOI support.\n" if ($adddoiresolver);
print OUT "%%% Added HyperTeX support.\n" if ($makehref == 1);
print OUT "%%% Added hyperref support.\n" if ($makehref == 2);
print OUT "%%% Original headers follow...\n\n";
$found{initconsts} = 0;
$found{outputnonnull} = 0;
$found{article} = 0;
$found{outputbibitem} = 0;
$found{finentry} = 0;
$found{formatdate} = 0;
$found{formattitle} = 0;
$found{newblock} = 0;
while (<IN>) {
/^ *%/ && do {
# Pass commented lines unchanged
print OUT;
next;
};
/^ *ENTRY/ && do {
# Work through the list of entry types, finding what ones are there.
# If we find a URL entry there already, object, since these edits
# will mess things up.
$line = $_;
until ($line =~ /\{\s*(\w*)/) {
$line .= <IN>;
}
$bracematchtotal = 0; # reset
bracematcher($line);
$line =~ /\{\s*(\w*)/;
$found{'entry'.$1} = 1;
print OUT $line;
$line = <IN>;
until (bracematcher($line) == 0) {
# XXX deal with multiple entries on one line
($line =~ /^\s*(\w*)/) && ($found{'entry'.$1} = 1);
print OUT $line;
$line = <IN>;
}
if (defined($found{entryurl})) {
print STDERR "$progname: style file $infile already has URL entry!\n";
# print out the rest of the file, and give up
print OUT $line;
while (<IN>) {
print OUT;
}
$exitstatus = 1;
last;
} else {
print OUT " eprint $mymarker\n doi $mymarker\n url $mymarker\n lastchecked $mymarker\n";
}
print OUT $line;
next;
};
/^ *FUNCTION *\{init\.state\.consts\}/ && do {
# In the init.state.consts function, add an extra set of
# constants at the beginning. Also use this as the marker for
# the place to add the init strings function.
print OUT <<EOD;
STRINGS { urlintro eprinturl eprintprefix doiprefix doiurl openinlinelink closeinlinelink } $mymarker...
INTEGERS { hrefform inlinelinks makeinlinelink addeprints adddoiresolver }
% Following constants may be adjusted by hand, if desired
FUNCTION {init.urlbst.variables}
{
"$availablestring" 'urlintro := % prefix before URL
"$eprinturl" 'eprinturl := % prefix to make URL from eprint ref
"$eprintprefix" 'eprintprefix := % text prefix printed before eprint ref
"$doiurl" 'doiurl := % prefix to make URL from DOI
"$doiprefix" 'doiprefix := % text prefix printed before DOI ref
#$addeprints 'addeprints := % 0=no eprints; 1=include eprints
#$adddoiresolver 'adddoiresolver := % 0=no DOI resolver; 1=include it
#$makehref 'hrefform := % 0=no crossrefs; 1=hypertex xrefs; 2=hyperref refs
#$inlinelinks 'inlinelinks := % 0=URLs explicit; 1=URLs attached to titles
% the following are internal state variables, not config constants
#0 'makeinlinelink := % state variable managed by setup.inlinelink
"" 'openinlinelink := % ditto
"" 'closeinlinelink := % ditto
}
INTEGERS {
bracket.state
outside.brackets
open.brackets
within.brackets
close.brackets
}
$mymarkerend
EOD
$line = $_;
until ($line =~ /\{.*\}.*\{/s) {
$line .= <IN>;
}
$line =~ s/(\{.*?\}.*?\{)/$1 #0 'outside.brackets := $mymarker
#1 'open.brackets :=
#2 'within.brackets :=
#3 'close.brackets :=
/s;
print OUT $line;
$found{initconsts} = 1;
next;
};
/^ *EXECUTE *\{init\.state\.consts\}/ && do {
print OUT "EXECUTE {init.urlbst.variables}\n";
print OUT;
next;
};
/^ *FUNCTION *\{new.block\}/ && do {
$found{newblock} = 1;
};
/^ *FUNCTION *{output\.nonnull}/ && do {
print OUT "$mymarker\n";
print OUT "FUNCTION {output.nonnull.original}\n";
copy_block();
print_output_functions();
$found{outputnonnull} = 1;
next;
};
/FUNCTION *\{fin.entry\}/ && do {
# Rename fin.entry to fin.entry.original (wrapped below)
s/fin.entry/fin.entry.original/;
$found{finentry} = 1;
print OUT;
next;
};
/^ *FUNCTION *{format\.date}/ && do {
$found{formatdate} = 1;
print OUT;
next;
};
/^ *FUNCTION *{format\.title}/ && do {
# record that we found this
$found{formattitle} = 1;
print OUT;
next;
};
/^ *format\.b?title/ && do {
# interpolate a call to setup.inlinelink
print OUT " title empty\$ 'skip\$ 'setup\.inlinelink if\$ $mymarker\n";
print OUT;
next;
};
/^ *FUNCTION *\{article\}/ && do {
print_missing_functions();
print_webpage_def();
print OUT;
$found{article} = 1;
next;
};
/FUNCTION *\{output.bibitem\}/ && do {
# Rename output.bibitem to output.bibitem.original (wrapped below)
s/{output.bibitem\}/\{output.bibitem.original\}/;
$found{outputbibitem} = 1;
print OUT;
next;
};
print OUT;
};
if ($exitstatus == 0) {
# Skip this if we've already reported an error -- it'll only be confusing
foreach $k (keys %found) {
if ($found{$k} == 0) {
print STDERR "$progname: $infile: failed to find feature $k\n";
}
}
}
close (IN);
close (OUT);
exit $exitstatus;;
sub print_output_functions {
print OUT "$mymarker...\n";
print OUT <<'EOD';
% The following three functions are for handling inlinelink. They wrap
% a block of text which is potentially output with write$ by multiple
% other functions, so we don't know the content a priori.
% They communicate between each other using the variables makeinlinelink
% (which is true if a link should be made), and closeinlinelink (which holds
% the string which should close any current link. They can be called
% at any time, but start.inlinelink will be a no-op unless something has
% previously set makeinlinelink true, and the two ...end.inlinelink functions
% will only do their stuff if start.inlinelink has previously set
% closeinlinelink to be non-empty.
FUNCTION {setup.inlinelink}
{ makeinlinelink
{ hrefform #1 = % hypertex
{ "\special {html:<a href=" quote$ * url * quote$ * "> }{" * 'openinlinelink :=
"\special {html:</a>}" 'closeinlinelink :=
}
{ hrefform #2 = % hyperref
{ "\href{" url * "}{" * 'openinlinelink :=
"}" 'closeinlinelink :=
}
'skip$
if$ % hrefform #2 =
}
if$ % hrefform #1 =
#0 'makeinlinelink :=
}
'skip$
if$ % makeinlinelink
}
FUNCTION {add.inlinelink}
{ openinlinelink empty$
'skip$
{ openinlinelink swap$ * closeinlinelink *
"" 'openinlinelink :=
}
if$
}
EOD
# new.block is defined elsewhere
print OUT <<'EOD';
FUNCTION {output.nonnull}
{ % Save the thing we've been asked to output
's :=
% If the bracket-state is close.brackets, then add a close-bracket to
% what is currently at the top of the stack, and set bracket.state
% to outside.brackets
bracket.state close.brackets =
{ "]" *
outside.brackets 'bracket.state :=
}
'skip$
if$
bracket.state outside.brackets =
{ % We're outside all brackets -- this is the normal situation.
% Write out what's currently at the top of the stack, using the
% original output.nonnull function.
s
add.inlinelink
output.nonnull.original % invoke the original output.nonnull
}
{ % Still in brackets. Add open-bracket or (continuation) comma, add the
% new text (in s) to the top of the stack, and move to the close-brackets
% state, ready for next time (unless inbrackets resets it). If we come
% into this branch, then output.state is carefully undisturbed.
bracket.state open.brackets =
{ " [" * }
{ ", " * } % bracket.state will be within.brackets
if$
s *
close.brackets 'bracket.state :=
}
if$
}
% Call this function just before adding something which should be presented in
% brackets. bracket.state is handled specially within output.nonnull.
FUNCTION {inbrackets}
{ bracket.state close.brackets =
{ within.brackets 'bracket.state := } % reset the state: not open nor closed
{ open.brackets 'bracket.state := }
if$
}
FUNCTION {format.lastchecked}
{ lastchecked empty$
{ "" }
{ inbrackets "cited " lastchecked * }
if$
}
EOD
print OUT "$mymarkerend\n";
}
sub print_webpage_def {
print OUT "$mymarker...\n";
# Some of the functions below call new.block, so we need a dummy
# version, in the case where the style being edited doesn't supply
# that function.
if (! $found{newblock}) {
print OUT "FUNCTION {new.block} % dummy new.block function\n{\n % empty\n}\n\n";
$found{newblock} = 1;
}
print OUT <<'EOD';
% Functions for making hypertext links.
% In all cases, the stack has (link-text href-url)
%
% make 'null' specials
FUNCTION {make.href.null}
{
pop$
}
% make hypertex specials
FUNCTION {make.href.hypertex}
{
"\special {html:<a href=" quote$ *
swap$ * quote$ * "> }" * swap$ *
"\special {html:</a>}" *
}
% make hyperref specials
FUNCTION {make.href.hyperref}
{
"\href {" swap$ * "} {\path{" * swap$ * "}}" *
}
FUNCTION {make.href}
{ hrefform #2 =
'make.href.hyperref % hrefform = 2
{ hrefform #1 =
'make.href.hypertex % hrefform = 1
'make.href.null % hrefform = 0 (or anything else)
if$
}
if$
}
% If inlinelinks is true, then format.url should be a no-op, since it's
% (a) redundant, and (b) could end up as a link-within-a-link.
FUNCTION {format.url}
{ inlinelinks #1 = url empty$ or
{ "" }
{ hrefform #1 =
{ % special case -- add HyperTeX specials
urlintro "\url{" url * "}" * url make.href.hypertex * }
{ urlintro "\url{" * url * "}" * }
if$
}
if$
}
FUNCTION {format.eprint}
{ eprint empty$
{ "" }
{ eprintprefix eprint * eprinturl eprint * make.href }
if$
}
FUNCTION {format.doi}
{ doi empty$
{ "" }
{ doiprefix doi * doiurl doi * make.href }
if$
}
% Output a URL. We can't use the more normal idiom (something like
% `format.url output'), because the `inbrackets' within
% format.lastchecked applies to everything between calls to `output',
% so that `format.url format.lastchecked * output' ends up with both
% the URL and the lastchecked in brackets.
FUNCTION {output.url}
{ url empty$
'skip$
{ new.block
format.url output
format.lastchecked output
}
if$
}
FUNCTION {output.web.refs}
{
new.block
output.url
addeprints eprint empty$ not and
{ format.eprint output.nonnull }
'skip$
if$
adddoiresolver doi empty$ not and
{ format.doi output.nonnull }
'skip$
if$
}
% Wrapper for output.bibitem.original.
% If the URL field is not empty, set makeinlinelink to be true,
% so that an inline link will be started at the next opportunity
FUNCTION {output.bibitem}
{ outside.brackets 'bracket.state :=
output.bibitem.original
inlinelinks url empty$ not and
{ #1 'makeinlinelink := }
{ #0 'makeinlinelink := }
if$
}
% Wrapper for fin.entry.original
FUNCTION {fin.entry}
{ output.web.refs % urlbst
makeinlinelink % ooops, it appears we didn't have a title for inlinelink
{ setup.inlinelink % add some artificial link text here, as a fallback
"[link]" output.nonnull }
'skip$
if$
bracket.state close.brackets = % urlbst
{ "]" * }
'skip$
if$
fin.entry.original
}
% Webpage entry type.
% Title and url fields required;
% author, note, year, month, and lastchecked fields optional
% See references
% ISO 690-2 http://www.nlc-bnc.ca/iso/tc46sc9/standard/690-2e.htm
% http://www.classroom.net/classroom/CitingNetResources.html
% http://neal.ctstateu.edu/history/cite.html
% http://www.cas.usf.edu/english/walker/mla.html
% for citation formats for web pages.
FUNCTION {webpage}
{ output.bibitem
author empty$
{ editor empty$
'skip$ % author and editor both optional
{ format.editors output.nonnull }
if$
}
{ editor empty$
{ format.authors output.nonnull }
{ "can't use both author and editor fields in " cite$ * warning$ }
if$
}
if$
new.block
title empty$ 'skip$ 'setup.inlinelink if$
format.title "title" output.check
inbrackets "online" output
new.block
year empty$
'skip$
{ format.date "year" output.check }
if$
% We don't need to output the URL details ('lastchecked' and 'url'),
% because fin.entry does that for us, using output.web.refs. The only
% reason we would want to put them here is if we were to decide that
% they should go in front of the rather miscellaneous information in 'note'.
new.block
note output
fin.entry
}
EOD
print OUT "$mymarkerend\n\n\n";
}
sub print_missing_functions {
# We've got to the bit of the file which handles the entry
# types, so write out the webpage entry handler. This uses
# the format.date function, which which many but not all
# bst files have (for example, apalike doesn't). So
# check that we either have found this function already, or
# add it.
if (! $found{formatdate}) {
if ($found{entrymonth}) {
print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
{ month empty$
{ "" }
{ "there's a month but no year in " cite$ * warning$
month
}
if$
}
{ month empty$
'year
{ month " " * year * }
if$
}
if$
}
EOD
} else {
print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
'skip$
{ %write$
"(" year * ")" *
}
if$
}
EOD
}
$found{formatdate} = 1;
}
# If the style file didn't supply a format.title function, then supply
# one here (the {webpage} function requires it).
if (! $found{formattitle}) {
print OUT <<'EOD';
FUNCTION {format.title}
{ title empty$
{ "" }
{ title "t" change.case$ }
if$
}
EOD
$found{formattitle} = 1;
}
}
# Utility function: Keep track of open and close braces in the string argument.
# Keep state in $bracematchtotal, return the current value.
sub bracematcher {
my $s = shift;
$s =~ s/[^\{\}]//g;
#print "s=$s\n";
foreach my $c (split (//, $s)) {
$bracematchtotal += ($c eq '{' ? 1 : -1);
}
return $bracematchtotal;
}
# Utility function: use bracematcher to copy the complete block which starts
# on or after the current line.
sub copy_block {
$bracematchtotal = 0;
# copy any leading lines which don't have braces (presumably comments)
while (defined ($line = <IN>) && ($line !~ /{/)) {
print OUT $line;
}
while (defined ($line) && bracematcher($line) > 0) {
print OUT $line;
$line = <IN>;
}
print OUT "$line\n"; # print out terminating \} (assumed
# alone on the line)
}