Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F117878055
rcube_spellchecker.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
17 KB
Referenced Files
None
Subscribers
None
rcube_spellchecker.php
View Options
<?php
/*
+-----------------------------------------------------------------------+
| program/include/rcube_spellchecker.php |
| |
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2011, Kolab Systems AG |
| Copyright (C) 2008-2011, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Spellchecking using different backends |
| |
+-----------------------------------------------------------------------+
| Author: Aleksander Machniak <machniak@kolabsys.com> |
| Author: Thomas Bruederli <roundcube@gmail.com> |
+-----------------------------------------------------------------------+
*/
/**
* Helper class for spellchecking with Googielspell and PSpell support.
*
* @package Framework
* @subpackage Utils
*/
class
rcube_spellchecker
{
private
$matches
=
array
();
private
$engine
;
private
$lang
;
private
$rc
;
private
$error
;
private
$separator
=
'/[
\s\r\n\t\(\)\/\[\]
{}<>
\\
"]+|[:;?!,
\.
]([^
\w
]|$)/'
;
private
$options
=
array
();
private
$dict
;
private
$have_dict
;
// default settings
const
GOOGLE_HOST
=
'ssl://www.google.com'
;
const
GOOGLE_PORT
=
443
;
const
MAX_SUGGESTIONS
=
10
;
/**
* Constructor
*
* @param string $lang Language code
*/
function
__construct
(
$lang
=
'en'
)
{
$this
->
rc
=
rcube
::
get_instance
();
$this
->
engine
=
$this
->
rc
->
config
->
get
(
'spellcheck_engine'
,
'googie'
);
$this
->
lang
=
$lang
?
$lang
:
'en'
;
$this
->
options
=
array
(
'ignore_syms'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_syms'
),
'ignore_nums'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_nums'
),
'ignore_caps'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_ignore_caps'
),
'dictionary'
=>
$this
->
rc
->
config
->
get
(
'spellcheck_dictionary'
),
);
}
/**
* Set content and check spelling
*
* @param string $text Text content for spellchecking
* @param bool $is_html Enables HTML-to-Text conversion
*
* @return bool True when no mispelling found, otherwise false
*/
function
check
(
$text
,
$is_html
=
false
)
{
// convert to plain text
if
(
$is_html
)
{
$this
->
content
=
$this
->
html2text
(
$text
);
}
else
{
$this
->
content
=
$text
;
}
if
(
$this
->
engine
==
'pspell'
)
{
$this
->
matches
=
$this
->
_pspell_check
(
$this
->
content
);
}
else
{
$this
->
matches
=
$this
->
_googie_check
(
$this
->
content
);
}
return
$this
->
found
()
==
0
;
}
/**
* Number of mispellings found (after check)
*
* @return int Number of mispellings
*/
function
found
()
{
return
count
(
$this
->
matches
);
}
/**
* Returns suggestions for the specified word
*
* @param string $word The word
*
* @return array Suggestions list
*/
function
get_suggestions
(
$word
)
{
if
(
$this
->
engine
==
'pspell'
)
{
return
$this
->
_pspell_suggestions
(
$word
);
}
return
$this
->
_googie_suggestions
(
$word
);
}
/**
* Returns misspelled words
*
* @param string $text The content for spellchecking. If empty content
* used for check() method will be used.
*
* @return array List of misspelled words
*/
function
get_words
(
$text
=
null
,
$is_html
=
false
)
{
if
(
$this
->
engine
==
'pspell'
)
{
return
$this
->
_pspell_words
(
$text
,
$is_html
);
}
return
$this
->
_googie_words
(
$text
,
$is_html
);
}
/**
* Returns checking result in XML (Googiespell) format
*
* @return string XML content
*/
function
get_xml
()
{
// send output
$out
=
'<?xml version="1.0" encoding="'
.
RCUBE_CHARSET
.
'"?><spellresult charschecked="'
.
mb_strlen
(
$this
->
content
).
'">'
;
foreach
(
$this
->
matches
as
$item
)
{
$out
.=
'<c o="'
.
$item
[
1
].
'" l="'
.
$item
[
2
].
'">'
;
$out
.=
is_array
(
$item
[
4
])
?
implode
(
"
\t
"
,
$item
[
4
])
:
$item
[
4
];
$out
.=
'</c>'
;
}
$out
.=
'</spellresult>'
;
return
$out
;
}
/**
* Returns checking result (misspelled words with suggestions)
*
* @return array Spellchecking result. An array indexed by word.
*/
function
get
()
{
$result
=
array
();
foreach
(
$this
->
matches
as
$item
)
{
if
(
$this
->
engine
==
'pspell'
)
{
$word
=
$item
[
0
];
}
else
{
$word
=
mb_substr
(
$this
->
content
,
$item
[
1
],
$item
[
2
],
RCUBE_CHARSET
);
}
$result
[
$word
]
=
is_array
(
$item
[
4
])
?
implode
(
"
\t
"
,
$item
[
4
])
:
$item
[
4
];
}
return
$result
;
}
/**
* Returns error message
*
* @return string Error message
*/
function
error
()
{
return
$this
->
error
;
}
/**
* Checks the text using pspell
*
* @param string $text Text content for spellchecking
*/
private
function
_pspell_check
(
$text
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
// tokenize
$text
=
preg_split
(
$this
->
separator
,
$text
,
NULL
,
PREG_SPLIT_NO_EMPTY
|
PREG_SPLIT_OFFSET_CAPTURE
);
$diff
=
0
;
$matches
=
array
();
foreach
(
$text
as
$w
)
{
$word
=
trim
(
$w
[
0
]);
$pos
=
$w
[
1
]
-
$diff
;
$len
=
mb_strlen
(
$word
);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
}
else
if
(!
pspell_check
(
$this
->
plink
,
$word
))
{
$suggestions
=
pspell_suggest
(
$this
->
plink
,
$word
);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
{
$suggestions
=
array_slice
(
$suggestions
,
0
,
self
::
MAX_SUGGESTIONS
);
}
$matches
[]
=
array
(
$word
,
$pos
,
$len
,
null
,
$suggestions
);
}
$diff
+=
(
strlen
(
$word
)
-
$len
);
}
return
$matches
;
}
/**
* Returns the misspelled words
*/
private
function
_pspell_words
(
$text
=
null
,
$is_html
=
false
)
{
$result
=
array
();
if
(
$text
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
// With PSpell we don't need to get suggestions to return misspelled words
if
(
$is_html
)
{
$text
=
$this
->
html2text
(
$text
);
}
$text
=
preg_split
(
$this
->
separator
,
$text
,
NULL
,
PREG_SPLIT_NO_EMPTY
|
PREG_SPLIT_OFFSET_CAPTURE
);
foreach
(
$text
as
$w
)
{
$word
=
trim
(
$w
[
0
]);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
continue
;
}
if
(!
pspell_check
(
$this
->
plink
,
$word
))
{
$result
[]
=
$word
;
}
}
return
$result
;
}
foreach
(
$this
->
matches
as
$m
)
{
$result
[]
=
$m
[
0
];
}
return
$result
;
}
/**
* Returns suggestions for misspelled word
*/
private
function
_pspell_suggestions
(
$word
)
{
// init spellchecker
$this
->
_pspell_init
();
if
(!
$this
->
plink
)
{
return
array
();
}
$suggestions
=
pspell_suggest
(
$this
->
plink
,
$word
);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
$suggestions
=
array_slice
(
$suggestions
,
0
,
self
::
MAX_SUGGESTIONS
);
return
is_array
(
$suggestions
)
?
$suggestions
:
array
();
}
/**
* Initializes PSpell dictionary
*/
private
function
_pspell_init
()
{
if
(!
$this
->
plink
)
{
if
(!
extension_loaded
(
'pspell'
))
{
$this
->
error
=
"Pspell extension not available"
;
rcube
::
raise_error
(
array
(
'code'
=>
500
,
'type'
=>
'php'
,
'file'
=>
__FILE__
,
'line'
=>
__LINE__
,
'message'
=>
$this
->
error
),
true
,
false
);
return
;
}
$this
->
plink
=
pspell_new
(
$this
->
lang
,
null
,
null
,
RCUBE_CHARSET
,
PSPELL_FAST
);
}
if
(!
$this
->
plink
)
{
$this
->
error
=
"Unable to load Pspell engine for selected language"
;
}
}
private
function
_googie_check
(
$text
)
{
// spell check uri is configured
$url
=
$this
->
rc
->
config
->
get
(
'spellcheck_uri'
);
if
(
$url
)
{
$a_uri
=
parse_url
(
$url
);
$ssl
=
(
$a_uri
[
'scheme'
]
==
'https'
||
$a_uri
[
'scheme'
]
==
'ssl'
);
$port
=
$a_uri
[
'port'
]
?
$a_uri
[
'port'
]
:
(
$ssl
?
443
:
80
);
$host
=
(
$ssl
?
'ssl://'
:
''
)
.
$a_uri
[
'host'
];
$path
=
$a_uri
[
'path'
]
.
(
$a_uri
[
'query'
]
?
'?'
.
$a_uri
[
'query'
]
:
''
)
.
$this
->
lang
;
}
else
{
$host
=
self
::
GOOGLE_HOST
;
$port
=
self
::
GOOGLE_PORT
;
$path
=
'/tbproxy/spell?lang='
.
$this
->
lang
;
}
// Google has some problem with spaces, use \n instead
$gtext
=
str_replace
(
' '
,
"
\n
"
,
$text
);
$gtext
=
'<?xml version="1.0" encoding="utf-8" ?>'
.
'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
.
'<text>'
.
$gtext
.
'</text>'
.
'</spellrequest>'
;
$store
=
''
;
if
(
$fp
=
fsockopen
(
$host
,
$port
,
$errno
,
$errstr
,
30
))
{
$out
=
"POST $path HTTP/1.0
\r\n
"
;
$out
.=
"Host: "
.
str_replace
(
'ssl://'
,
''
,
$host
)
.
"
\r\n
"
;
$out
.=
"Content-Length: "
.
strlen
(
$gtext
)
.
"
\r\n
"
;
$out
.=
"Content-Type: application/x-www-form-urlencoded
\r\n
"
;
$out
.=
"Connection: Close
\r\n\r\n
"
;
$out
.=
$gtext
;
fwrite
(
$fp
,
$out
);
while
(!
feof
(
$fp
))
$store
.=
fgets
(
$fp
,
128
);
fclose
(
$fp
);
}
if
(!
$store
)
{
$this
->
error
=
"Empty result from spelling engine"
;
}
preg_match_all
(
'/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<
\/
c>/'
,
$store
,
$matches
,
PREG_SET_ORDER
);
// skip exceptions (if appropriate options are enabled)
if
(!
empty
(
$this
->
options
[
'ignore_syms'
])
||
!
empty
(
$this
->
options
[
'ignore_nums'
])
||
!
empty
(
$this
->
options
[
'ignore_caps'
])
||
!
empty
(
$this
->
options
[
'dictionary'
])
)
{
foreach
(
$matches
as
$idx
=>
$m
)
{
$word
=
mb_substr
(
$text
,
$m
[
1
],
$m
[
2
],
RCUBE_CHARSET
);
// skip exceptions
if
(
$this
->
is_exception
(
$word
))
{
unset
(
$matches
[
$idx
]);
}
}
}
return
$matches
;
}
private
function
_googie_words
(
$text
=
null
,
$is_html
=
false
)
{
if
(
$text
)
{
if
(
$is_html
)
{
$text
=
$this
->
html2text
(
$text
);
}
$matches
=
$this
->
_googie_check
(
$text
);
}
else
{
$matches
=
$this
->
matches
;
$text
=
$this
->
content
;
}
$result
=
array
();
foreach
(
$matches
as
$m
)
{
$result
[]
=
mb_substr
(
$text
,
$m
[
1
],
$m
[
2
],
RCUBE_CHARSET
);
}
return
$result
;
}
private
function
_googie_suggestions
(
$word
)
{
if
(
$word
)
{
$matches
=
$this
->
_googie_check
(
$word
);
}
else
{
$matches
=
$this
->
matches
;
}
if
(
$matches
[
0
][
4
])
{
$suggestions
=
explode
(
"
\t
"
,
$matches
[
0
][
4
]);
if
(
sizeof
(
$suggestions
)
>
self
::
MAX_SUGGESTIONS
)
{
$suggestions
=
array_slice
(
$suggestions
,
0
,
MAX_SUGGESTIONS
);
}
return
$suggestions
;
}
return
array
();
}
private
function
html2text
(
$text
)
{
$h2t
=
new
html2text
(
$text
,
false
,
true
,
0
);
return
$h2t
->
get_text
();
}
/**
* Check if the specified word is an exception accoring to
* spellcheck options.
*
* @param string $word The word
*
* @return bool True if the word is an exception, False otherwise
*/
public
function
is_exception
(
$word
)
{
// Contain only symbols (e.g. "+9,0", "2:2")
if
(!
$word
||
preg_match
(
'/^[0-9@#$%^&_+~*=:;?!,.-]+$/'
,
$word
))
return
true
;
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators
if
(!
empty
(
$this
->
options
[
'ignore_syms'
])
&&
preg_match
(
'/[@#$%^&_+~*=-]/'
,
$word
))
return
true
;
// Contain numbers (e.g. "g00g13")
if
(!
empty
(
$this
->
options
[
'ignore_nums'
])
&&
preg_match
(
'/[0-9]/'
,
$word
))
return
true
;
// Blocked caps (e.g. "GOOGLE")
if
(!
empty
(
$this
->
options
[
'ignore_caps'
])
&&
$word
==
mb_strtoupper
(
$word
))
return
true
;
// Use exceptions from dictionary
if
(!
empty
(
$this
->
options
[
'dictionary'
]))
{
$this
->
load_dict
();
// @TODO: should dictionary be case-insensitive?
if
(!
empty
(
$this
->
dict
)
&&
in_array
(
$word
,
$this
->
dict
))
return
true
;
}
return
false
;
}
/**
* Add a word to dictionary
*
* @param string $word The word to add
*/
public
function
add_word
(
$word
)
{
$this
->
load_dict
();
foreach
(
explode
(
' '
,
$word
)
as
$word
)
{
// sanity check
if
(
strlen
(
$word
)
<
512
)
{
$this
->
dict
[]
=
$word
;
$valid
=
true
;
}
}
if
(
$valid
)
{
$this
->
dict
=
array_unique
(
$this
->
dict
);
$this
->
update_dict
();
}
}
/**
* Remove a word from dictionary
*
* @param string $word The word to remove
*/
public
function
remove_word
(
$word
)
{
$this
->
load_dict
();
if
((
$key
=
array_search
(
$word
,
$this
->
dict
))
!==
false
)
{
unset
(
$this
->
dict
[
$key
]);
$this
->
update_dict
();
}
}
/**
* Update dictionary row in DB
*/
private
function
update_dict
()
{
if
(
strcasecmp
(
$this
->
options
[
'dictionary'
],
'shared'
)
!=
0
)
{
$userid
=
$this
->
rc
->
get_user_id
();
}
$plugin
=
$this
->
rc
->
plugins
->
exec_hook
(
'spell_dictionary_save'
,
array
(
'userid'
=>
$userid
,
'language'
=>
$this
->
lang
,
'dictionary'
=>
$this
->
dict
));
if
(!
empty
(
$plugin
[
'abort'
]))
{
return
;
}
if
(
$this
->
have_dict
)
{
if
(!
empty
(
$this
->
dict
))
{
$this
->
rc
->
db
->
query
(
"UPDATE "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" SET data = ?"
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
implode
(
' '
,
$plugin
[
'dictionary'
]),
$plugin
[
'language'
]);
}
// don't store empty dict
else
{
$this
->
rc
->
db
->
query
(
"DELETE FROM "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
$plugin
[
'language'
]);
}
}
else
if
(!
empty
(
$this
->
dict
))
{
$this
->
rc
->
db
->
query
(
"INSERT INTO "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" (user_id, "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
", data) VALUES (?, ?, ?)"
,
$plugin
[
'userid'
],
$plugin
[
'language'
],
implode
(
' '
,
$plugin
[
'dictionary'
]));
}
}
/**
* Get dictionary from DB
*/
private
function
load_dict
()
{
if
(
is_array
(
$this
->
dict
))
{
return
$this
->
dict
;
}
if
(
strcasecmp
(
$this
->
options
[
'dictionary'
],
'shared'
)
!=
0
)
{
$userid
=
$this
->
rc
->
get_user_id
();
}
$plugin
=
$this
->
rc
->
plugins
->
exec_hook
(
'spell_dictionary_get'
,
array
(
'userid'
=>
$userid
,
'language'
=>
$this
->
lang
,
'dictionary'
=>
array
()));
if
(
empty
(
$plugin
[
'abort'
]))
{
$dict
=
array
();
$this
->
rc
->
db
->
query
(
"SELECT data FROM "
.
$this
->
rc
->
db
->
table_name
(
'dictionary'
)
.
" WHERE user_id "
.
(
$plugin
[
'userid'
]
?
"= "
.
$this
->
rc
->
db
->
quote
(
$plugin
[
'userid'
])
:
"IS NULL"
)
.
" AND "
.
$this
->
rc
->
db
->
quoteIdentifier
(
'language'
)
.
" = ?"
,
$plugin
[
'language'
]);
if
(
$sql_arr
=
$this
->
rc
->
db
->
fetch_assoc
(
$sql_result
))
{
$this
->
have_dict
=
true
;
if
(!
empty
(
$sql_arr
[
'data'
]))
{
$dict
=
explode
(
' '
,
$sql_arr
[
'data'
]);
}
}
$plugin
[
'dictionary'
]
=
array_merge
((
array
)
$plugin
[
'dictionary'
],
$dict
);
}
if
(!
empty
(
$plugin
[
'dictionary'
])
&&
is_array
(
$plugin
[
'dictionary'
]))
{
$this
->
dict
=
$plugin
[
'dictionary'
];
}
else
{
$this
->
dict
=
array
();
}
return
$this
->
dict
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sun, Apr 5, 9:57 PM (2 w, 6 d ago)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
aa/75/ae3b8daf8c5a839c28b19d75b6c2
Default Alt Text
rcube_spellchecker.php (17 KB)
Attached To
Mode
rS syncroton
Attached
Detach File
Event Timeline