|
|
|
@ -1,6 +1,8 @@
|
|
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
|
|
|
|
* |
|
|
|
|
* Copyright (C) 2002-2017 Németh László |
|
|
|
|
* |
|
|
|
|
* The contents of this file are subject to the Mozilla Public License Version |
|
|
|
|
* 1.1 (the "License"); you may not use this file except in compliance with |
|
|
|
|
* the License. You may obtain a copy of the License at |
|
|
|
@ -11,12 +13,7 @@
|
|
|
|
|
* for the specific language governing rights and limitations under the |
|
|
|
|
* License. |
|
|
|
|
* |
|
|
|
|
* The Original Code is Hunspell, based on MySpell. |
|
|
|
|
* |
|
|
|
|
* The Initial Developers of the Original Code are |
|
|
|
|
* Kevin Hendricks (MySpell) and Németh László (Hunspell). |
|
|
|
|
* Portions created by the Initial Developers are Copyright (C) 2002-2005 |
|
|
|
|
* the Initial Developers. All Rights Reserved. |
|
|
|
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. |
|
|
|
|
* |
|
|
|
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, |
|
|
|
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, |
|
|
|
@ -518,18 +515,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {
|
|
|
|
|
|
|
|
|
|
w_char upper_utf(w_char u, int langnum) { |
|
|
|
|
unsigned short idx = (u.h << 8) + u.l; |
|
|
|
|
if (idx != unicodetoupper(idx, langnum)) { |
|
|
|
|
u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); |
|
|
|
|
u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short upridx = unicodetoupper(idx, langnum); |
|
|
|
|
if (idx != upridx) { |
|
|
|
|
u.h = (unsigned char)(upridx >> 8); |
|
|
|
|
u.l = (unsigned char)(upridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
w_char lower_utf(w_char u, int langnum) { |
|
|
|
|
unsigned short idx = (u.h << 8) + u.l; |
|
|
|
|
if (idx != unicodetolower(idx, langnum)) { |
|
|
|
|
u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8); |
|
|
|
|
u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short lwridx = unicodetolower(idx, langnum); |
|
|
|
|
if (idx != lwridx) { |
|
|
|
|
u.h = (unsigned char)(lwridx >> 8); |
|
|
|
|
u.l = (unsigned char)(lwridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
|
} |
|
|
|
@ -551,12 +550,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, |
|
|
|
|
int langnum) { |
|
|
|
|
int langnum) { |
|
|
|
|
for (size_t i = 0; i < u.size(); ++i) { |
|
|
|
|
unsigned short idx = (u[i].h << 8) + u[i].l; |
|
|
|
|
if (idx != unicodetolower(idx, langnum)) { |
|
|
|
|
u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); |
|
|
|
|
u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short lwridx = unicodetolower(idx, langnum); |
|
|
|
|
if (idx != lwridx) { |
|
|
|
|
u[i].h = (unsigned char)(lwridx >> 8); |
|
|
|
|
u[i].l = (unsigned char)(lwridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
@ -565,9 +565,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
|
|
|
|
|
std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { |
|
|
|
|
for (size_t i = 0; i < u.size(); i++) { |
|
|
|
|
unsigned short idx = (u[i].h << 8) + u[i].l; |
|
|
|
|
if (idx != unicodetoupper(idx, langnum)) { |
|
|
|
|
u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); |
|
|
|
|
u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short upridx = unicodetoupper(idx, langnum); |
|
|
|
|
if (idx != upridx) { |
|
|
|
|
u[i].h = (unsigned char)(upridx >> 8); |
|
|
|
|
u[i].l = (unsigned char)(upridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
@ -583,9 +584,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
|
|
|
|
|
std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { |
|
|
|
|
if (!u.empty()) { |
|
|
|
|
unsigned short idx = (u[0].h << 8) + u[0].l; |
|
|
|
|
if (idx != unicodetoupper(idx, langnum)) { |
|
|
|
|
u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); |
|
|
|
|
u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short upridx = unicodetoupper(idx, langnum); |
|
|
|
|
if (idx != upridx) { |
|
|
|
|
u[0].h = (unsigned char)(upridx >> 8); |
|
|
|
|
u[0].l = (unsigned char)(upridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
@ -601,9 +603,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
|
|
|
|
|
std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { |
|
|
|
|
if (!u.empty()) { |
|
|
|
|
unsigned short idx = (u[0].h << 8) + u[0].l; |
|
|
|
|
if (idx != unicodetolower(idx, langnum)) { |
|
|
|
|
u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); |
|
|
|
|
u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); |
|
|
|
|
unsigned short lwridx = unicodetolower(idx, langnum); |
|
|
|
|
if (idx != lwridx) { |
|
|
|
|
u[0].h = (unsigned char)(lwridx >> 8); |
|
|
|
|
u[0].l = (unsigned char)(lwridx & 0x00FF); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return u; |
|
|
|
@ -2531,12 +2534,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
|
|
|
|
|
size_t ncap = 0; |
|
|
|
|
size_t nneutral = 0; |
|
|
|
|
size_t firstcap = 0; |
|
|
|
|
for (size_t i = 0; i < word.size(); ++i) { |
|
|
|
|
unsigned short idx = (word[i].h << 8) + word[i].l; |
|
|
|
|
if (idx != unicodetolower(idx, langnum)) |
|
|
|
|
|
|
|
|
|
std::vector<w_char>::const_iterator it = word.begin(); |
|
|
|
|
std::vector<w_char>::const_iterator it_end = word.end(); |
|
|
|
|
while (it != it_end) { |
|
|
|
|
unsigned short idx = (it->h << 8) + it->l; |
|
|
|
|
unsigned short lwridx = unicodetolower(idx, langnum); |
|
|
|
|
if (idx != lwridx) |
|
|
|
|
ncap++; |
|
|
|
|
if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) |
|
|
|
|
if (unicodetoupper(idx, langnum) == lwridx) |
|
|
|
|
nneutral++; |
|
|
|
|
++it; |
|
|
|
|
} |
|
|
|
|
if (ncap) { |
|
|
|
|
unsigned short idx = (word[0].h << 8) + word[0].l; |
|
|
|
|