#!/usr/bin/perl
$|=1;
# WBOSS, Web Based Open Source Spellchcker Version 2.5i
# Copyright 2001, Joshua Cantara
# This program is licensed under the GPL: http://www.gnu.org/licenses/gpl.txt
# Newest version can always be found at: http://dontpokebadgers.com/spellchecker/
#####################################
# LOAD MODULES!
#####################################
use strict;
use CGI;
use IPC::Open3;
#####################################
# WHICH SPELL CHECKER TO USE?
# CHANGE THIS VARIABLE IF NEEDED!
#####################################
my $path = '/usr/bin/aspell';
#my $path = '/usr/local/bin/ispell';
######################################
# SET GLOBAL VARIABLES
######################################
use vars qw(@words $wordframe $wordcount $worderror $wordignore $pageheaders);
@words = ();
$wordframe = "";
$wordcount = 0;
$worderror = 0;
$wordignore = "";
$pageheaders = qq|
|;
#####################################
# MAIN LOOP!
#####################################
my $query = new CGI;
my $string = $query->param('checkme');
my $form = $query->param('form');
my $field = $query->param('field');
my $pid;
my $pwd = `pwd`;
chomp $pwd;
print "Content-type: text/html\n\n";
print qq||;
untie *STDIN;
if ($query->param('spell') eq 'check')
{
if (-e 'custom.dic')
{
$pid = open3(\*WRITER,\*READER,\*ERROR,"$path -p $pwd/custom.dic -a -S") or die "Can't open aspell!";
}
else {
$pid = open3(\*WRITER,\*READER,\*ERROR,"$path -a -S") or die "Can't open aspell!";
}
text2words($string);
checkit($form, $field);
close READER;
close WRITER;
wait;
}
elsif ($query->param('Finish Checking') eq 'Finish Checking')
{
query2words($query);
final($form, $field);
}
elsif ($query->param('Check Again') eq 'Check Again')
{
if (-e 'custom.dic')
{
$pid = open3(\*WRITER,\*READER,\*ERROR,"$path -p $pwd/custom.dic -a -S") or die "Can't open aspell!";
}
else {
$pid = open3(\*WRITER,\*READER,\*ERROR,"$path -a -S") or die "Can't open aspell!";
}
query2words($query);
checkit($form,$field);
close READER;
close WRITER;
wait;
}
else {
&asktext;
}
exit;
#####################################
# SPLIT/JOIN THE INPUT
#####################################
sub _word2label {
my $word = $_[0];
my $label = '%%WORD'.$wordcount.'%%';
if ($wordignore =~ /$word/i || $word =~ /^WORD/)
{
return($word);
}
$words[$wordcount] = $word;
$wordcount++;
return($label);
}
##################################################
# FILL $WORDFRAME AND @WORDS BY INPUT SPLIT
##################################################
sub text2words {
my $text = $_[0];
# ignore valid contractions (due to problems with these on some systems)
$wordignore = "they'll we'll you'll she'll he'll i'll ";
$wordignore .= "hasn't wouldn't shouldn't didn't aren't ";
$wordignore .= "couldn't doesn't hadn't wasn't weren't isn't ";
$wordignore .= "we've you've they've ";
$wordignore .= "can't don't shan't ";
# ignore the following always
$wordignore .= "http ftp nntp smtp nfs html xml mailto bsd linux gnu gpl openwebmail ";
# ignore URLs
foreach ($text =~ m![A-Za-z]+tp://[A-Za-z\d\.]+!ig)
{
$wordignore .= " $_";
}
# ignore email addresses
foreach ($text =~ m![A-Za-z\d]+\@[A-Za-z\d]+!ig)
{
$wordignore .= " $_";
}
# ignore domain names
foreach ($text =~ m![A-Za-z\d\.]+\.(com|org|edu|net|gov)[A-Za-z\d\.]*!ig)
{
$wordignore .= " $_";
}
@words = ();
$wordcount = 0;
$wordframe = $text;
######################
#ATTN: If you have problems with international characters, disable the bottom line and enable the top one.
######################
# a-z A-Z English characters only.
#$wordframe =~ s/([A-Za-z][A-Za-z\-]*[A-Za-z])|(~~[A-Za-z][A-Za-z\-]*[A-Za-z])/_word2label($1)/ge;
# Extended characters, such as those with accents
$wordframe =~ s/([^\W\d_][^\W\d_\-]*[^\W\d_])|(~~[^\W\d_][^\W\d_\-]*[^\W\d_])/_word2label($1)/ge;
return $wordcount;
}
###########################################
# FILL $WORDFRAME AND @WORDS FROM CGI
###########################################
sub query2words {
my $q = $_[0];
my $i;
@words = ();
$wordcount = $q->param('wordcount');
$wordframe = CGI::unescape($q->param('wordframe'));
for ($i=0; $i<$wordcount; $i++)
{
$words[$i] = $q->param($i) if (defined ($q->param($i)))
}
}
#########################################
# BUILD OUTPUT FROM $WORDFRAME AND @WORDS
#########################################
sub words2text {
my $text = $wordframe;
$text =~ s/%%WORD(\d+)%%/$words[$1]/ge;
$text =~ s/~~([A-Za-z]*)/$1/ge; # covert manualfix
return($text);
}
##############################################################
# GENERATE SPELLCHECK HTML
##############################################################
sub words2html {
my $html = $wordframe;
my $i;
# escape html codes, convert line breaks
$html =~ s/&/&/g;
$html =~ s/</g;
$html =~ s/>/>/g;
$html =~ s/\n/
/g;
$html =~ s/"/"/g;
$html =~ s/ ( +)/ $1/g;
for ($i=0; $i<$wordcount; $i++)
{
my $wordhtml = "";
if ($words[$i]=~/^~~/) # check if manualfix
{
my $origword = substr($words[$i],2);
my $len = length($origword);
$wordhtml = qq|\n|;
$worderror++;
}
else { # normal word
my ($r) = spellcheck($words[$i]);
if ($r->{'type'} eq 'none' || $r->{'type'} eq 'guess')
{
my $len = length($words[$i]);
$wordhtml = qq|\n|;
$worderror++;
}
elsif ($r->{'type'} eq 'miss')
{
my $sugg;
$wordhtml = qq|\n|;
$worderror++;
}
else { # type= ok, compound, root
$wordhtml = qq|$words[$i]|;
$wordframe =~ s/%%WORD$i%%/$words[$i]/; # remove the word symbol from wordframe
}
}
$html =~ s/%%WORD$i%%/$wordhtml/;
}
return($html);
}
#####################################
# CHECK TEXT FOR ERRORS AND ASK FOR VERIFICATION
#####################################
sub checkit {
my ($formname,$fieldname) = @_;
# escapedwordframe must be done after words2html()
# since $wordframe may changed in words2html()
my $wordshtml = words2html();
my $escapedwordframe = CGI::escape($wordframe);
print qq|