#!/usr/local/bin/perl # # Copyright (c) Nov 1997-1999 Wolfram Schneider , Berlin. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $Id: dict.cgi,v 1.28 2000/01/22 13:57:46 wosch Exp $ # # dict.cgi - Web search interface for a german <-> english dictionary # You need perl4 or higher, the agrep tool and a free # available dictionary. sub init_variables { $grep = '/home/wosch/bin/agrep-wrapper'; # must be an absolute path $grep = '/usr/local/bin/agrep'; %dict = ( 'tuc', '/home/wosch/lib/dict/ger-eng.txt', 'tub', '/home/wosch/lib/dict/eng2ger.vok', 'avon', '/home/wosch/lib/dict/avon', ); %url = ( 'tuc', 'ftp://ftp.tu-chemnitz.de/pub/Local/urz/fri/ger-eng.txt.gz', 'tub', 'http://pub.cs.tu-berlin.de/lib/dictionaries/eng2ger.vok', 'avon', 'http://www.de.freebsd.org/~wosch/src/avon-1996.gz', ); %desc = ( 'tuc', 'General german-english dictionary, TU Chemnitz', 'tub', 'General german-english dictionary, TU Berlin', 'avon', 'German phone code', ); $eightbit = 0; # dictionary is 8 bit clean # word delimiter in dictionary %delim = ('tub', '\*', 'tuc', '\s::\s', 'avon', ':'); # englisch - deutsch -> 0 # deutsch - englisch -> 1 %order = ('tuc', '1'); $daemonGif = ''; # '; $debug = 0; # visible E-Mail address, plain text $mailto = 'wosch@FreeBSD.org'; # the URL if you click at the E-Mail address (see below) $mailtoURL = 'http://wolfram.schneider.org/'; $mailtoURL = "mailto:$mailto" if !$mailtoURL; # security $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; $ENV{'HOME'} = '/tmp'; # strange, agrep expect a HOME directory $SIG{'ALRM'} = 'timeout'; # signal handler for timeouts $sec = 10; # timeout in seconds # default values $lang = "0"; $icase = "1"; $wholewords = "0"; $regexp = "0"; $hits = $default{'hits'} = '50'; $matches = "0"; $plang = $default{'plang'} = 'en'; $db = $default{'db'} = 'tuc'; # altavista meta keywords $meta = ''; undef $pid; } sub read_variables { $query_string = &env('QUERY_STRING'); $path_info = &env('PATH_INFO'); &decode_form($query_string, *form); $script_name = &env('SCRIPT_NAME'); $lang = $form{'lang'} if defined($form{'lang'}); $plang = $form{'plang'} if defined($form{'plang'}); $db = $form{'db'} if defined($form{'db'}); $icase = $form{'icase'} if defined($form{'icase'}); $wholewords = $form{'wholewords'} if defined($form{'wholewords'}); $regexp = $form{'regexp'} if defined($form{'regexp'}); $hits = $form{'hits'} if defined($form{'hits'}); $matches = $form{'matches'} if defined($form{'matches'}); $query = $form{'query'} if defined($form{'query'}); # best match does not work if you search only # in one language. $lang = 0 if ($matches && $lang); } sub init_lang { # main language german %de = ( 'title', 'Englisch-Deutsches Wörterbuch', 'de', 'deutsch', 'en', 'englisch', 'bln', 'berlinerisch', 'searchfor', 'Suche nach', 'start', 'Start', 'oen', 'nur englisch', 'ode', 'nur deutsch', 'oende', 'deutsch und englisch', 'lang', 'Sprache', 'book', 'Wörterbuch', 'yes', 'ja', 'no', 'nein', 'icase', 'Groß-und Kleinschreibung ignorieren', 'wword', 'Ganze Wörter', 'regexp', 'Reguläre Ausdrücke', 'hits', 'Anzahl der Treffer', '0error', 'keine Fehler', '1error', '1 Fehler', '2error', '2 Fehler', '3error', '3 Fehler', 'bestmatch', 'passend', 'cmatch', 'Anzahl der Fehler', 'abort', 'Suche abgebrochen nach', 'matches', 'Treffern', 'nf', 'Nichts gefunden für', ); # english %en = ( 'title', 'English-German dictionary', 'de', 'German', 'en', 'English', 'bln', 'Berlin', 'searchfor', 'Search for', 'start', 'Submit', 'oen', 'only English', 'ode', 'only German', 'oende', 'German and English', 'lang', 'Language', 'book', 'Dictionary', 'yes', 'yes', 'no', 'no', 'icase', 'Ignore case distinctions', 'wword', 'Whole words', 'regexp', 'Regular expressions', 'hits', 'Hits', '0error', 'no errors', '1error', '1 error', '2error', '2 error', '3error', '3 error', 'bestmatch', 'Best match', 'cmatch', 'Numbers of errors', 'abort', 'Abort search after', 'matches', 'matches', 'nf', 'Nothing found for', ); # berlin %bln = ( 'title', 'Änglisch-Deutsches Wörtabuch', 'bln', 'berlinerisch', 'no', 'nee', 'icase', 'Jroß-und Kleenschreibung ignoriern', 'wword', 'Janze Wörta', 'regexp', 'rejuläre Ausdrücke', 'abort', 'Suche abjebrochen nach', '0error', 'keene Fehler', 'nf', 'Nüschts jefunden für', ); while(($key, $val) = each %de) { $bln{"$key"} = $val if !$bln{"$key"}; $en{"$key"} = $val if !$en{"$key"}; } @langlist = ('de', 'en', 'bln'); $default{'plang'} = $langlist[0] if !grep(/^$default{'plang'}$/, @langlist); $plang = $default{'plang'} if !grep(/^$plang$/, @langlist); foreach (@langlist) { if ($plang eq $_) { eval "*cm = *$_"; last; } } } sub timeout { $| = 1; local($message) = "timeout after $sec seconds.
\nReason: "; if ($sec < 15) { $message .= "CPU limit reached\n"; } else { $message .= "Slow network\n"; } print "\n"; print $message, "

\n"; warn $message . $ENV{'QUERY_STRING'} . "\n"; &footer; &footer2; kill 15, $pid if $pid; kill 9, $pid if $pid; kill 1, $$; # kill process group exit; } sub dec { local($_) = @_; s/\+/ /g; # '+' -> space s/%(..)/pack("c",hex($1))/ge; # '%ab' -> char ab return($_); } sub header { print "Window-target: _top\n"; print "Content-type: text/html\n"; print "\n"; } # to 7 bits sub umlaut { local($_) = @_; return $_ if $eightbit; # dictionaries is 8 bit clean ;-) s/\ß/ß/g; s/ä/ae/g; s/ö/oe/g; s/ü/ue/g; s/ß/ss/g; s/Ä/Ae/g; s/Ö/Oe/g; s/Ü/Ue/g; s/\&([AOUaou])uml;/$1e/g; return $_; } sub search { local($query, @grepopt) = @_; local($counter) = 0; $| = 1; unshift (@grepopt, '-h'); # push(@grepopt, '-e'); $db = $default{'db'} if !defined($dict{"$db"}); local($dict) = $dict{"$db"}; print "$grep, @grepopt, $query, $dict\n" if $debug > 0; if (! -x $grep) { print "Alert! Binary not found\n"; return -1; } if (! -r $dict) { print "Alert! dictionary not found\n"; return -1; } if ($pid = open(C, "-|")) { } # child else { if (!exec ($grep, @grepopt, $query, $dict)) { print "Alert! Cannot fork: $!\n"; return -1; } } local($m) = 0; $m = 1 if $query =~ /^[\w\s\-]+$/; local($h) = $hits; $h = $default{'hits'} if $hits < 1 || $hits > 1000; local($delim) = $delim{"$db"}; # swap entries local($lang) = $lang; if ($order{"$db"}) { if ($lang eq "1") { $lang = "2"; } elsif ($lang eq "2") { $lang = "1"; } } # timeout for child process - agrep command vec($rin,fileno(C),1) = 1; local($nfound,$timeleft) = select($rout=$rin, undef, undef, $sec); &timeout if (!$nfound); # timeout for parent process - cgi script $sec = 60; alarm($sec); local($from, $to, $ft); while() { if ($m) { ($from, $to) = split(/$delim/); if ($lang eq "1") { if ($icase eq "1") { next if $from !~ /$query/oi; } else { next if $from !~ /$query/o; } } elsif ($lang eq "2") { if ($icase eq "1") { next if $to !~ /$query/oi; } else { next if $to !~ /$query/o; } } $_ = sprintf("%-30s \- %s", $from, $to); s/($query)/$1<\/b>/oig; } else { ($from, $to) = split(/$delim/); $_ = sprintf("%-30s \- %s", $from, $to); } print; $counter++; if ($counter > $h) { print qq([$cm{'abort'} $h $cm{'matches'}]\n); $h = 0; # flag last; } } close C; print "[$cm{'hits'} $counter]\n" if $h && $counter > 5; return $counter; } # $indent is a bit of optional data processing I put in for # formatting the data nicely when you are emailing it. # This is derived from code by Denis Howe # and Thomas A Fine sub decode_form { local($form, *data, $indent, $key, $_) = @_; foreach $_ (split(/&/, $form)) { ($key, $_) = split(/=/, $_, 2); $_ =~ s/\+/ /g; # + -> space $key =~ s/\+/ /g; # + -> space $_ =~ s/%([\da-f]{1,2})/pack(C,hex($1))/eig; # undo % escapes $key =~ s/%([\da-f]{1,2})/pack(C,hex($1))/eig; # undo % escapes $_ =~ s/[\r\n]+/\n\t/g if defined($indent); # indent data after \n $data{$key} = $_; } } # encode unknown data for use in a URL sub encode_url { local($_) = @_; s/([\000-\032\;\/\?\:\@\&\=\%\'\"\`\<\>\177-\377 ])/sprintf('%%%02x',ord($1))/eg; # s/%20/+/g; $_; } sub warn { print "$_[0]" } sub env { defined($ENV{$_[0]}) ? $ENV{$_[0]} : undef; } sub exit { exit 0 }; sub forms { $db = $default{'db'} if !defined($dict{"$db"}); print qq[ $cm{'title'} $meta

$cm{'title'} $daemonGif

]; foreach (@langlist) { next if $plang eq $_; print qq{} . qq($cm{"$_"} ); } print qq{[FAQ]
}; print "

" if $#langlist > 0; print qq(

$cm{'searchfor'}:

); print qq{\n}; print qq{\n}; local(%d); %d = (1, $cm{'oen'}, 2, $cm{'ode'}, 0, $cm{'oende'}); print qq($cm{'lang'}\n\n\n}; print qq($cm{'book'}\n
\n\n}; undef %d; %d = ('1', $cm{'yes'}, '0', $cm{'no'}); print qq($cm{'icase'}\n\n
\n\n}; print qq($cm{'wword'}\n\n\n}; print qq($cm{'regexp'}\n\n
\n\n}; print qq($cm{'hits'}\n\n\n"; undef %d; %d = ('0', $cm{'0error'}, '1', $cm{'1error'}, '2', $cm{'2error'}, '3', $cm{'3error'}, '999', $cm{'bestmatch'}, ); print qq($cm{'cmatch'}\n\n}; print qq{

\n}; print qq{
}; } sub footer { local($gif) = 'powerlogo.gif'; $gif = '../' . $gif if $path_info; print qq{ [Powered by FreeBSD] © 1997-1999 by Wolfram Schneider. All rights reserved.
Please direct questions about this service to $mailto

\n

\n}; } sub footer2 { print "\n\n\n"; } sub faq { print qq{\nFAQ\n

FAQ

  • Get the Source of this script.
  • This is not an official mirror site from any other site.
  • Dictionary search with a Shell-Script.
  • Umlaute werden bei der Eingabe als US ASCII (ae, oe, ue, ss, Ae, Oe, Ue), ISO 8859-1 Zeichen (äöüßÄÖÜ) oder als SGML tags (`&Uuml;' für `Ü') akzeptiert.

}; print qq{

Other Dictionaries / Andere Wörterbücher

\n}; foreach (qw{ http://www.yahoo.de/Geisteswissenschaften/Sprach__und_Literaturwissenschaft/Sprachen/Englisch/Woerterbuecher http://www.dictionary.com/Dir/Reference/Dictionaries/Translation/G/German/ http://www.facstaff.bucknell.edu/rbeard/diction3.html#german http://directory.netscape.com/Reference/Dictionaries/Translation/G/German/ http://dir.lycos.com/Reference/Dictionaries/Translation/G/German/}) { print qq{$_
\n}; } print qq{

Download the dictionaries

\n}; foreach (reverse sort keys %url) { print qq{

$desc{$_}

\n}; print qq{$_ => $url{$_}
\n}; } print qq{

Back to the search engine


}; } ######################### # Main # &init_variables; &read_variables; &init_lang; if ($path_info eq "/source") { print "Content-type: text/plain\n\n"; open(R, $0) || do { print "ick!\n"; &exit; }; while() { print } close R; &exit; } &header; if ($path_info eq "/faq.html") { &faq; &footer; &footer2; &exit(0); } # allow dict.cgi?word $query = $1 if (!$query && $query_string =~ /^([^=&]+)$/); # start forms &forms; # first call, nothing to search if ($query_string eq "") { &footer; &footer2; &exit(0); } $counter = 0; print qq{ lang: $lang
plang: $plang
icase: $icase
wholewords: $wholewords
regexp: $regexp
hits: $hits
matches: $matches
query: $query
db: $db
} if $debug > 0; @grepopt = (); push(@grepopt, '-i') if $icase; push(@grepopt, '-w') if $wholewords; push(@grepopt, '-' . $matches) if ($matches eq "1" || $matches eq "2" || $matches eq "3"); push(@grepopt, ('-y', '-B')) if $matches eq "999"; push(@grepopt, '-k') if !$regexp || $query =~ /^-/; $hits = 50 if $hits < 1 || $hits > 2000; if ($query) { print "
\n";
    $ret = &search(¨aut($query), @grepopt);
    print "
\n"; if (!$ret) { print qq($cm{'nf'} ``$query''\n); } print "
\n"; } &footer; &footer2; # EOF