#!/usr/bin/perl $|=1; # WBOSS, Web Based Open Source Spellchcker Version 2.5i # Copyright 2001, Joshua Cantara # This program is licensed under the GPL: http://www.gnu.org/licenses/gpl.txt # Newest version can always be found at: http://dontpokebadgers.com/spellchecker/ ##################################### # LOAD MODULES! ##################################### use strict; use CGI; use IPC::Open3; ##################################### # WHICH SPELL CHECKER TO USE? # CHANGE THIS VARIABLE IF NEEDED! ##################################### my $path = '/usr/bin/aspell'; #my $path = '/usr/local/bin/ispell'; ###################################### # SET GLOBAL VARIABLES ###################################### use vars qw(@words $wordframe $wordcount $worderror $wordignore $pageheaders); @words = (); $wordframe = ""; $wordcount = 0; $worderror = 0; $wordignore = ""; $pageheaders = qq| |; ##################################### # MAIN LOOP! ##################################### my $query = new CGI; my $string = $query->param('checkme'); my $form = $query->param('form'); my $field = $query->param('field'); my $pid; my $pwd = `pwd`; chomp $pwd; print "Content-type: text/html\n\n"; print qq||; untie *STDIN; if ($query->param('spell') eq 'check') { if (-e 'custom.dic') { $pid = open3(\*WRITER,\*READER,\*ERROR,"$path -p $pwd/custom.dic -a -S") or die "Can't open aspell!"; } else { $pid = open3(\*WRITER,\*READER,\*ERROR,"$path -a -S") or die "Can't open aspell!"; } text2words($string); checkit($form, $field); close READER; close WRITER; wait; } elsif ($query->param('Finish Checking') eq 'Finish Checking') { query2words($query); final($form, $field); } elsif ($query->param('Check Again') eq 'Check Again') { if (-e 'custom.dic') { $pid = open3(\*WRITER,\*READER,\*ERROR,"$path -p $pwd/custom.dic -a -S") or die "Can't open aspell!"; } else { $pid = open3(\*WRITER,\*READER,\*ERROR,"$path -a -S") or die "Can't open aspell!"; } query2words($query); checkit($form,$field); close READER; close WRITER; wait; } else { &asktext; } exit; ##################################### # SPLIT/JOIN THE INPUT ##################################### sub _word2label { my $word = $_[0]; my $label = '%%WORD'.$wordcount.'%%'; if ($wordignore =~ /$word/i || $word =~ /^WORD/) { return($word); } $words[$wordcount] = $word; $wordcount++; return($label); } ################################################## # FILL $WORDFRAME AND @WORDS BY INPUT SPLIT ################################################## sub text2words { my $text = $_[0]; # ignore valid contractions (due to problems with these on some systems) $wordignore = "they'll we'll you'll she'll he'll i'll "; $wordignore .= "hasn't wouldn't shouldn't didn't aren't "; $wordignore .= "couldn't doesn't hadn't wasn't weren't isn't "; $wordignore .= "we've you've they've "; $wordignore .= "can't don't shan't "; # ignore the following always $wordignore .= "http ftp nntp smtp nfs html xml mailto bsd linux gnu gpl openwebmail "; # ignore URLs foreach ($text =~ m![A-Za-z]+tp://[A-Za-z\d\.]+!ig) { $wordignore .= " $_"; } # ignore email addresses foreach ($text =~ m![A-Za-z\d]+\@[A-Za-z\d]+!ig) { $wordignore .= " $_"; } # ignore domain names foreach ($text =~ m![A-Za-z\d\.]+\.(com|org|edu|net|gov)[A-Za-z\d\.]*!ig) { $wordignore .= " $_"; } @words = (); $wordcount = 0; $wordframe = $text; ###################### #ATTN: If you have problems with international characters, disable the bottom line and enable the top one. ###################### # a-z A-Z English characters only. #$wordframe =~ s/([A-Za-z][A-Za-z\-]*[A-Za-z])|(~~[A-Za-z][A-Za-z\-]*[A-Za-z])/_word2label($1)/ge; # Extended characters, such as those with accents $wordframe =~ s/([^\W\d_][^\W\d_\-]*[^\W\d_])|(~~[^\W\d_][^\W\d_\-]*[^\W\d_])/_word2label($1)/ge; return $wordcount; } ########################################### # FILL $WORDFRAME AND @WORDS FROM CGI ########################################### sub query2words { my $q = $_[0]; my $i; @words = (); $wordcount = $q->param('wordcount'); $wordframe = CGI::unescape($q->param('wordframe')); for ($i=0; $i<$wordcount; $i++) { $words[$i] = $q->param($i) if (defined ($q->param($i))) } } ######################################### # BUILD OUTPUT FROM $WORDFRAME AND @WORDS ######################################### sub words2text { my $text = $wordframe; $text =~ s/%%WORD(\d+)%%/$words[$1]/ge; $text =~ s/~~([A-Za-z]*)/$1/ge; # covert manualfix return($text); } ############################################################## # GENERATE SPELLCHECK HTML ############################################################## sub words2html { my $html = $wordframe; my $i; # escape html codes, convert line breaks $html =~ s/&/&/g; $html =~ s//>/g; $html =~ s/\n/
/g; $html =~ s/"/"/g; $html =~ s/ ( +)/ $1/g; for ($i=0; $i<$wordcount; $i++) { my $wordhtml = ""; if ($words[$i]=~/^~~/) # check if manualfix { my $origword = substr($words[$i],2); my $len = length($origword); $wordhtml = qq|\n|; $worderror++; } else { # normal word my ($r) = spellcheck($words[$i]); if ($r->{'type'} eq 'none' || $r->{'type'} eq 'guess') { my $len = length($words[$i]); $wordhtml = qq|\n|; $worderror++; } elsif ($r->{'type'} eq 'miss') { my $sugg; $wordhtml = qq|\n|; $worderror++; } else { # type= ok, compound, root $wordhtml = qq|$words[$i]|; $wordframe =~ s/%%WORD$i%%/$words[$i]/; # remove the word symbol from wordframe } } $html =~ s/%%WORD$i%%/$wordhtml/; } return($html); } ##################################### # CHECK TEXT FOR ERRORS AND ASK FOR VERIFICATION ##################################### sub checkit { my ($formname,$fieldname) = @_; # escapedwordframe must be done after words2html() # since $wordframe may changed in words2html() my $wordshtml = words2html(); my $escapedwordframe = CGI::escape($wordframe); print qq| $pageheaders Text Checked
Verify Spell Check
Drop the boxes below down to choose a suggested replacement, keep your original, or choose "--Manually Fix--" and then "Check Again" if none of the suggestions fit what you intended. A text box appears if no suggestions were found. Retype the word, and try again.
$wordshtml
Go back to the previous page.
|; } ##################################### # LOAD FINAL CORRECTIONS AND MAKE CHANGES ##################################### sub final { my ($formname, $fieldname) = @_; my $escapedfinalstring = words2text(); # since jscript has problem in unescape doublebyte char string, # we only escape " to !QUOT! and unescape in jscript by RegExp # $escapedfinalstring=CGI::escape(words2text()); $escapedfinalstring =~ s/"/!QUOT!/g; print qq| $pageheaders Done Checking!
Corrections Are Being Made To Your Original Page
If you see this screen, please wait for it to load. It will close after the corrections have been made to your text. If an error in loading occurs you may use the links below to navigate back or start again.
Click here to check more text.
Go back to the previous page.
Click here to close this window.
|; } ##################################### # ASKS FOR TEXT TO CHECK ##################################### sub asktext { print qq| $pageheaders Spell Checker
Please Copy and Paste Text Below

Click here to close this window.
|; } ####################################### # DEBUG SUB ROUTINE # Useage: &debug(); ####################################### sub debug { my $q = new CGI; print ''; } ################################################ # SPELLCHECK SUBROUTINE! ################################################ sub spellcheck { my $pid = undef; my $word = shift(@_); my @commentary; my @results; my %types = ( # correct words: '*' => 'ok', '-' => 'compound', '+' => 'root', # misspelled words: '#' => 'none', '&' => 'miss', '?' => 'guess', ); my %modisp = ( 'root' => sub { my $h = shift; $h->{'root'} = shift; }, 'none' => sub { my $h = shift; $h->{'original'} = shift; $h->{'offset'} = shift; }, 'miss' => sub { # also used for 'guess' my $h = shift; $h->{'original'} = shift; $h->{'count'} = shift; # count will always be 0, when $c eq '?'. $h->{'offset'} = shift; my @misses = splice @_, 0, $h->{'count'}; my @guesses = @_; $h->{'misses'} = \@misses; $h->{'guesses'} = \@guesses; }, ); $modisp{'guess'} = $modisp{'miss'}; # same handler. chomp $word; $word =~ s/\r//g; $word =~ /\n/ and warn "newlines not allowed"; print WRITER "!\n"; print WRITER "^$word\n"; while () { chomp; last unless $_ gt ''; push (@commentary, $_) if substr($_,0,1) =~ /([*|-|+|#|&|?| ||])/; } for my $i (0 .. $#commentary) { my %h = ('commentary' => $commentary[$i]); my @tail; # will get stuff after a colon, if any. if ($h{'commentary'} =~ s/:\s+(.*)//) { my $tail = $1; @tail = split /, /, $tail; } my($c,@args) = split ' ', $h{'commentary'}; my $type = $types{$c} || 'unknown'; $modisp{$type} and $modisp{$type}->( \%h, @args, @tail ); $h{'type'} = $type; $h{'term'} = $h{'original'}; push @results, \%h; } return $results[0]; }