[tex-live] hyphenation exceptions for US English
Werner LEMBERG
wl at gnu.org
Fri Nov 16 09:25:21 CET 2007
> Copyright 2007 TeX Users Group.
> You may freely use, modify and/or distribute this file.
>
> i've no problem with this. can one of you please
> add this (maybe via the script?) and post it?
Attached. The perl version of the script is part of groff, BTW.
Werner
-------------- next part --------------
#! /usr/bin/perl
#
#
# hyphenex.pl
#
# This small filter converts a hyphenation exception log article for
# TUGBoat to a real \hyphenation block.
#
# Written by Werner Lemberg <wl at gnu.org>.
#
# Version 1.2 (2007/11/16)
#
# Public domain.
#
#
# Usage:
#
# [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions
# print header
print "% Hyphenation exceptions for US English,\n";
print "% based on the hyphenation exception log article in TUGBoat.\n";
print "%\n";
print "% Copyright 2007 TeX Users Group.\n";
print "% You may freely use, modify and/or distribute this file.\n";
print "%\n";
print "% This is an automatically generated file. Do not edit!\n";
print "%\n";
print "% Please contact Barbara Beeton <bnb\@ams.org>\n";
print "% for corrections and omissions.\n";
print "\n";
print "\\hyphenation{\n";
while (<>) {
# retain only lines starting with \1 ... \6 or \tabalign
next if not (m/^\\[123456]/ || m/^\\tabalign/);
# remove final newline
chop;
# remove all TeX commands except \1 ... \6
s/\\[^123456\s{]+//g;
# remove all paired { ... }
1 while s/{(.*?)}/\1/g;
# skip lines which now have only whitespace before `&'
next if m/^\s*&/;
# remove comments
s/%.*//;
# remove trailing whitespace
s/\s*$//;
# remove trailing `*' (used as a marker in the document)
s/\*$//;
# split at whitespace
@field = split(' ');
if ($field[0] eq "\\1" || $field[0] eq "\\4") {
print " $field[2]\n";
}
elsif ($field[0] eq "\\2" || $field[0] eq "\\5") {
print " $field[2]\n";
# handle multiple suffixes separated by commata
@suffix_list = split(/,/, "$field[3]");
foreach $suffix (@suffix_list) {
print " $field[2]$suffix\n";
}
}
elsif ($field[0] eq "\\3" || $field[0] eq "\\6") {
# handle multiple suffixes separated by commata
@suffix_list = split(/,/, "$field[3],$field[4]");
foreach $suffix (@suffix_list) {
print " $field[2]$suffix\n";
}
}
else {
# for `&', split at `&' with trailing whitespace
@field = split(/&\s*/);
print " $field[1]\n";
}
}
# print trailer
print "}\n";
print "\n";
print "% EOF\n";
-------------- next part --------------
#! /bin/sh
#
#
# hyphenex.sh
#
# This small filter converts a hyphenation exception log article for
# TUGBoat to a real \hyphenation block.
#
# Written by Werner Lemberg <wl at gnu.org>.
#
# Version 1.4 (2007/11/16)
#
# Public domain.
#
#
# Usage:
#
# sh hyphenex.sh < tugboat-article > hyphenation-exceptions
cat >/tmp/sed.$$ <<SED_EOF
1 i\\
% Hyphenation exceptions for US English, based on the hyphenation exception\\
% log article in TUGBoat.\\
%\\
% Copyright 2007 TeX Users Group.\\
% You may freely use, modify and/or distribute this file.\\
%\\
% This is an automatically generated file. Do not edit!\\
%\\
% Please contact Barbara Beeton <bnb at ams.org> for corrections and omissions.\\
\\
\\\\hyphenation{
\$ i\\
}\\
\\
% EOF
/^\\\\tabalign/ {
s/{}//g
s/[ ][ ]*/ /g
s/ *\\\\[^ ][^ ]*\$//
s/\\\\tabalign *[^ ][^ ]*.*& *\\(.*\\)\$/ \\1/p
b
}
/^\\\\[123456]/ !d
s/\\\\4/\\\\1/
s/\\\\5/\\\\2/
s/\\\\6/\\\\3/
s/[ ][ ]*/ /g
s/^/ /
s/\\\\1 [^ ][^ ]* {\\(.*\\)}\$/\\1/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)}\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\)}\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\)}\$/\\1\\n \\1\\2/
s/\\\\3 [^ ][^ ]* {\\(.*\\) \\(.*\\) \\(.*\\)}\$/\\1\\2\\n \\1\\3/
s/\\\\1 [^ ][^ ]* {\\(.*\\)} .*\$/\\1/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)} .*\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\)} .*\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\)} .*\$/\\1\\n \\1\\2/
s/\\\\3 [^ ][^ ]* {\\(.*\\) \\(.*\\) \\(.*\\)} .*\$/\\1\\2\\n \\1\\3/
s/\\\\1 {.*} {\\(.*\\)}\$/\\1/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)}\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\)}\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\)}\$/\\1\\n \\1\\2/
s/\\\\3 {.*} {\\(.*\\) \\(.*\\) \\(.*\\)}\$/\\1\\2\\n \\1\\3/
s/\\\\1 {.*} {\\(.*\\)} .*\$/\\1/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)} .*\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\)} .*\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\)} .*\$/\\1\\n \\1\\2/
s/\\\\3 {.*} {\\(.*\\) \\(.*\\) \\(.*\\)} .*\$/\\1\\2\\n \\1\\3/
s/\\\\1 [^ ][^ ]* \\([^ ][^ ]*\\)\$/\\1/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\),\\([^ ][^ ]*\\)\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\)\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\)\$/\\1\\n \\1\\2/
s/\\\\3 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) \\([^ ][^ ]*\\)\$/\\1\\2\\n \\1\\3/
s/\\\\1 [^ ][^ ]* \\([^ ][^ ]*\\) .*\$/\\1/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\),\\([^ ][^ ]*\\) .*\$/\\1\\n \\1\\2\\n \\1\\3\\n \\1\\4/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\) .*\$/\\1\\n \\1\\2\\n \\1\\3/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) .*\$/\\1\\n \\1\\2/
s/\\\\3 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) .*\$/\\1\\2\\n \\1\\3/
s/\\*//
s/\\\\llap *{\\(.*\\)}/\\1/
s/\\\\[^ ]* //
s/\\\\[^ ][^ ]*\$//
s/{.*}//
p
SED_EOF
sed -n -f /tmp/sed.$$
rm /tmp/sed.$$
# EOF
More information about the tex-live
mailing list