[tex-live] hyphenation exceptions for US English

Werner LEMBERG wl at gnu.org
Fri Nov 16 09:25:21 CET 2007


>     Copyright 2007 TeX Users Group.
>     You may freely use, modify and/or distribute this file.
> 
> i've no problem with this.  can one of you please
> add this (maybe via the script?) and post it?

Attached.  The perl version of the script is part of groff, BTW.


    Werner
-------------- next part --------------
#! /usr/bin/perl
#
#
# hyphenex.pl
#
# This small filter converts a hyphenation exception log article for
# TUGBoat to a real \hyphenation block.
#
# Written by Werner Lemberg <wl at gnu.org>.
#
# Version 1.2 (2007/11/16)
#
# Public domain.
#
#
# Usage:
#
#   [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions

# print header
print "% Hyphenation exceptions for US English,\n";
print "% based on the hyphenation exception log article in TUGBoat.\n";
print "%\n";
print "% Copyright 2007 TeX Users Group.\n";
print "% You may freely use, modify and/or distribute this file.\n";
print "%\n";
print "% This is an automatically generated file.  Do not edit!\n";
print "%\n";
print "% Please contact Barbara Beeton <bnb\@ams.org>\n";
print "% for corrections and omissions.\n";
print "\n";
print "\\hyphenation{\n";

while (<>) {
  # retain only lines starting with \1 ... \6 or \tabalign
  next if not (m/^\\[123456]/ || m/^\\tabalign/);
  # remove final newline
  chop;
  # remove all TeX commands except \1 ... \6
  s/\\[^123456\s{]+//g;
  # remove all paired { ... }
  1 while s/{(.*?)}/\1/g;
  # skip lines which now have only whitespace before `&'
  next if m/^\s*&/;
  # remove comments
  s/%.*//;
  # remove trailing whitespace
  s/\s*$//;
  # remove trailing `*' (used as a marker in the document)
  s/\*$//;
  # split at whitespace
  @field = split(' ');
  if ($field[0] eq "\\1" || $field[0] eq "\\4") {
    print "  $field[2]\n";
  }
  elsif ($field[0] eq "\\2" || $field[0] eq "\\5") {
    print "  $field[2]\n";
    # handle multiple suffixes separated by commata
    @suffix_list = split(/,/, "$field[3]");
    foreach $suffix (@suffix_list) {
      print "  $field[2]$suffix\n";
    }
  }
  elsif ($field[0] eq "\\3" || $field[0] eq "\\6") {
    # handle multiple suffixes separated by commata
    @suffix_list = split(/,/, "$field[3],$field[4]");
    foreach $suffix (@suffix_list) {
      print "  $field[2]$suffix\n";
    }
  }
  else {
    # for `&', split at `&' with trailing whitespace
    @field = split(/&\s*/);
    print "  $field[1]\n";
  }
}

# print trailer
print "}\n";
print "\n";
print "% EOF\n";
-------------- next part --------------
#! /bin/sh
#
#
# hyphenex.sh
#
# This small filter converts a hyphenation exception log article for
# TUGBoat to a real \hyphenation block.
#
# Written by Werner Lemberg <wl at gnu.org>.
#
# Version 1.4 (2007/11/16)
#
# Public domain.
#
#
# Usage:
#
#   sh hyphenex.sh < tugboat-article > hyphenation-exceptions

cat >/tmp/sed.$$ <<SED_EOF
1 i\\
% Hyphenation exceptions for US English, based on the hyphenation exception\\
% log article in TUGBoat.\\
%\\
% Copyright 2007 TeX Users Group.\\
% You may freely use, modify and/or distribute this file.\\
%\\
% This is an automatically generated file.  Do not edit!\\
%\\
% Please contact Barbara Beeton <bnb at ams.org> for corrections and omissions.\\
\\
\\\\hyphenation{
\$ i\\
}\\
\\
% EOF
/^\\\\tabalign/ {
  s/{}//g
  s/[ 	][ 	]*/ /g
  s/ *\\\\[^ ][^ ]*\$//
  s/\\\\tabalign *[^ ][^ ]*.*& *\\(.*\\)\$/  \\1/p
  b
}
/^\\\\[123456]/ !d
s/\\\\4/\\\\1/
s/\\\\5/\\\\2/
s/\\\\6/\\\\3/
s/[ 	][ 	]*/ /g
s/^/  /
s/\\\\1 [^ ][^ ]* {\\(.*\\)}\$/\\1/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)}\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\)}\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\)}\$/\\1\\n  \\1\\2/
s/\\\\3 [^ ][^ ]* {\\(.*\\) \\(.*\\) \\(.*\\)}\$/\\1\\2\\n  \\1\\3/
s/\\\\1 [^ ][^ ]* {\\(.*\\)} .*\$/\\1/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)} .*\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\),\\(.*\\)} .*\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 [^ ][^ ]* {\\(.*\\) \\(.*\\)} .*\$/\\1\\n  \\1\\2/
s/\\\\3 [^ ][^ ]* {\\(.*\\) \\(.*\\) \\(.*\\)} .*\$/\\1\\2\\n  \\1\\3/
s/\\\\1 {.*} {\\(.*\\)}\$/\\1/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)}\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\)}\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\)}\$/\\1\\n  \\1\\2/
s/\\\\3 {.*} {\\(.*\\) \\(.*\\) \\(.*\\)}\$/\\1\\2\\n  \\1\\3/
s/\\\\1 {.*} {\\(.*\\)} .*\$/\\1/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\),\\(.*\\)} .*\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\),\\(.*\\)} .*\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 {.*} {\\(.*\\) \\(.*\\)} .*\$/\\1\\n  \\1\\2/
s/\\\\3 {.*} {\\(.*\\) \\(.*\\) \\(.*\\)} .*\$/\\1\\2\\n  \\1\\3/
s/\\\\1 [^ ][^ ]* \\([^ ][^ ]*\\)\$/\\1/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\),\\([^ ][^ ]*\\)\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\)\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\)\$/\\1\\n  \\1\\2/
s/\\\\3 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) \\([^ ][^ ]*\\)\$/\\1\\2\\n  \\1\\3/
s/\\\\1 [^ ][^ ]* \\([^ ][^ ]*\\) .*\$/\\1/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\),\\([^ ][^ ]*\\) .*\$/\\1\\n  \\1\\2\\n  \\1\\3\\n  \\1\\4/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\),\\([^ ][^ ]*\\) .*\$/\\1\\n  \\1\\2\\n  \\1\\3/
s/\\\\2 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) .*\$/\\1\\n  \\1\\2/
s/\\\\3 [^ ][^ ]* \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) \\([^ ][^ ]*\\) .*\$/\\1\\2\\n  \\1\\3/
s/\\*//
s/\\\\llap *{\\(.*\\)}/\\1/
s/\\\\[^ ]* //
s/\\\\[^ ][^ ]*\$//
s/{.*}//
p
SED_EOF

sed -n -f /tmp/sed.$$
rm /tmp/sed.$$

# EOF


More information about the tex-live mailing list