lookup.cpp
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* kspell2 - adopted from enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * Copyright (C) 2004 Zack Rusin <zack@kde.org> 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the 00018 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00019 * Boston, MA 02110-1301, USA. 00020 * 00021 * In addition, as a special exception, Dom Lachowicz 00022 * gives permission to link the code of this program with 00023 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00024 * spell checker backend) and distribute linked combinations including 00025 * the two. You must obey the GNU General Public License in all 00026 * respects for all of the code used other than said providers. If you modify 00027 * this file, you may extend this exception to your version of the 00028 * file, but you are not obligated to do so. If you do not wish to 00029 * do so, delete this exception statement from your version. 00030 */ 00031 00032 /* 00033 * lookup.c - see if a word appears in the dictionary 00034 * 00035 * Pace Willisson, 1983 00036 * 00037 * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA 00038 * All rights reserved. 00039 * 00040 * Redistribution and use in source and binary forms, with or without 00041 * modification, are permitted provided that the following conditions 00042 * are met: 00043 * 00044 * 1. Redistributions of source code must retain the above copyright 00045 * notice, this list of conditions and the following disclaimer. 00046 * 2. Redistributions in binary form must reproduce the above copyright 00047 * notice, this list of conditions and the following disclaimer in the 00048 * documentation and/or other materials provided with the distribution. 00049 * 3. All modifications to the source code must be clearly marked as 00050 * such. Binary redistributions based on modified source code 00051 * must be clearly marked as modified versions in the documentation 00052 * and/or other materials provided with the distribution. 00053 * 4. All advertising materials mentioning features or use of this software 00054 * must display the following acknowledgment: 00055 * This product includes software developed by Geoff Kuenning and 00056 * other unpaid contributors. 00057 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00058 * products derived from this software without specific prior 00059 * written permission. 00060 * 00061 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00062 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00063 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00064 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00065 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00066 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00067 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00068 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00069 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00070 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00071 * SUCH DAMAGE. 00072 */ 00073 00074 /* 00075 * $Log$ 00076 * Revision 1.1 2004/01/31 16:44:12 zrusin 00077 * ISpell plugin. 00078 * 00079 * Revision 1.7 2003/09/25 02:44:48 dom 00080 * bug 5813 00081 * 00082 * Revision 1.6 2003/08/26 13:20:40 dom 00083 * ispell crasher fix, implement enchant_dictionary_release 00084 * 00085 * Revision 1.5 2003/08/26 13:08:03 uwog 00086 * Fix segfault when the requested dictionary couldn't be found. 00087 * 00088 * Revision 1.4 2003/08/14 16:27:36 dom 00089 * update some documentation 00090 * 00091 * Revision 1.3 2003/07/28 20:40:27 dom 00092 * fix up the license clause, further win32-registry proof some directory getting functions 00093 * 00094 * Revision 1.2 2003/07/16 22:52:47 dom 00095 * LGPL + exception license 00096 * 00097 * Revision 1.1 2003/07/15 01:15:07 dom 00098 * ispell enchant backend 00099 * 00100 * Revision 1.3 2003/01/29 05:50:12 hippietrail 00101 * 00102 * Fixed my mess in EncodingManager. 00103 * Changed many C casts to C++ casts. 00104 * 00105 * Revision 1.2 2003/01/25 03:16:05 hippietrail 00106 * 00107 * An UT_ICONV_INVALID fix which escaped the last commit. 00108 * 00109 * Revision 1.1 2003/01/24 05:52:34 hippietrail 00110 * 00111 * Refactored ispell code. Old ispell global variables had been put into 00112 * an allocated structure, a pointer to which was passed to many functions. 00113 * I have now made all such functions and variables private members of the 00114 * ISpellChecker class. It was C OO, now it's C++ OO. 00115 * 00116 * I've fixed the makefiles and tested compilation but am unable to test 00117 * operation. Please back out my changes if they cause problems which 00118 * are not obvious or easy to fix. 00119 * 00120 * Revision 1.12 2003/01/06 18:48:39 dom 00121 * ispell cleanup, start of using new 'add' save features 00122 * 00123 * Revision 1.11 2002/09/19 05:31:17 hippietrail 00124 * 00125 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00126 * K&R function declarations removed, converted to Doxygen style comments 00127 * where possible. No code has been changed (I hope). Compiles for me but 00128 * unable to test. 00129 * 00130 * Revision 1.10 2002/09/17 03:03:30 hippietrail 00131 * 00132 * After seeking permission on the developer list I've reformatted all the 00133 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00134 * spaces for tabs. It should all look good with our standard 4-space 00135 * tabs now. 00136 * I've concentrated just on indentation in the actual code. More prettying 00137 * could be done. 00138 * * NO code changes were made * 00139 * 00140 * Revision 1.9 2002/09/13 17:20:13 mpritchett 00141 * Fix more warnings for Linux build 00142 * 00143 * Revision 1.8 2002/05/03 09:49:43 fjfranklin 00144 * o hash downloader update (Gabriel Gerhardsson) 00145 * - Comment out the "Can't open <dictionary>" printf. 00146 * - Make the progressbar more clean at the begining of the download. 00147 * - Add support for tarballs that doesn't have the full path included 00148 * - Fix copyright headers on the newly added files (*HashDownloader.*) 00149 * 00150 * Revision 1.7 2001/08/27 19:06:30 dom 00151 * Lots of compilation fixes 00152 * 00153 * Revision 1.6 2001/08/10 18:32:40 dom 00154 * Spelling and iconv updates. god, i hate iconv 00155 * 00156 * Revision 1.5 2001/08/10 09:57:49 hub 00157 * Patch by sobomax@FreeBSD.org 00158 * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and 00159 * src/wp/impexp/xp/ie_imp_RTF.cpp. 00160 * See bug 1823 00161 * 00162 * Revision 1.4 2001/07/18 17:46:01 dom 00163 * Module changes, and fix compiler warnings 00164 * 00165 * Revision 1.3 2001/06/12 21:32:49 dom 00166 * More ispell work... 00167 * 00168 * Revision 1.2 2001/05/12 16:05:42 thomasf 00169 * Big pseudo changes to ispell to make it pass around a structure rather 00170 * than rely on all sorts of gloabals willy nilly here and there. Also 00171 * fixed our spelling class to work with accepting suggestions once more. 00172 * This code is dirty, gross and ugly (not to mention still not supporting 00173 * multiple hash sized just yet) but it works on my machine and will no 00174 * doubt break other machines. 00175 * 00176 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00177 * moving to spell/xp 00178 * 00179 * Revision 1.7 1999/09/29 23:33:32 justin 00180 * Updates to the underlying ispell-based code to support suggested corrections. 00181 * 00182 * Revision 1.6 1999/04/13 17:12:51 jeff 00183 * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. 00184 * Fixed crash on Win32 with the new code. 00185 * 00186 * Revision 1.5 1999/01/07 01:07:48 paul 00187 * Fixed spell leaks. 00188 * 00189 * Revision 1.5 1999/01/07 01:07:48 paul 00190 * Fixed spell leaks. 00191 * 00192 * Revision 1.4 1998/12/29 14:55:33 eric 00193 * 00194 * I've doctored the ispell code pretty extensively here. It is now 00195 * warning-free on Win32. It also *works* on Win32 now, since I 00196 * replaced all the I/O calls with ANSI standard ones. 00197 * 00198 * Revision 1.3 1998/12/28 23:11:30 eric 00199 * 00200 * modified spell code and integration to build on Windows. 00201 * This is still a hack. 00202 * 00203 * Actually, it doesn't yet WORK on Windows. It just builds. 00204 * SpellCheckInit is failing for some reason. 00205 * 00206 * Revision 1.2 1998/12/28 22:16:22 eric 00207 * 00208 * These changes begin to incorporate the spell checker into AbiWord. Most 00209 * of this is a hack. 00210 * 00211 * 1. added other/spell to the -I list in config/abi_defs 00212 * 2. replaced other/spell/Makefile with one which is more like 00213 * our build system. 00214 * 3. added other/spell to other/Makefile so that the build will now 00215 * dive down and build the spell check library. 00216 * 4. added the AbiSpell library to the Makefiles in wp/main 00217 * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. 00218 * This call is a HACK and should be replaced with something 00219 * proper later. 00220 * 6. added code to fv_View.cpp as follows: 00221 * whenever you double-click on a word, the spell checker 00222 * verifies that word and prints its status to stdout. 00223 * 00224 * Caveats: 00225 * 1. This will break the Windows build. I'm going to work on fixing it 00226 * now. 00227 * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. 00228 * The dictionary location is currently hard-coded. This will be 00229 * fixed as well. 00230 * 00231 * Anyway, such as it is, it works. 00232 * 00233 * Revision 1.1 1998/12/28 18:04:43 davet 00234 * Spell checker code stripped from ispell. At this point, there are 00235 * two external routines... the Init routine, and a check-a-word routine 00236 * which returns a boolean value, and takes a 16 bit char string. 00237 * The code resembles the ispell code as much as possible still. 00238 * 00239 * Revision 1.42 1995/01/08 23:23:42 geoff 00240 * Support MSDOS_BINARY_OPEN when opening the hash file to read it in. 00241 * 00242 * Revision 1.41 1994/01/25 07:11:51 geoff 00243 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00244 * 00245 */ 00246 00247 #include <stdlib.h> 00248 #include <string.h> 00249 #include <ctype.h> 00250 00251 #include "ispell_checker.h" 00252 #include "msgs.h" 00253 00254 #ifdef INDEXDUMP 00255 static void dumpindex P ((struct flagptr * indexp, int depth)); 00256 #endif /* INDEXDUMP */ 00257 00258 int gnMaskBits = 64; 00259 00265 int ISpellChecker::linit (char *hashname) 00266 { 00267 FILE* fpHash; 00268 00269 register int i; 00270 register struct dent * dp; 00271 struct flagent * entry; 00272 struct flagptr * ind; 00273 int nextchar, x; 00274 int viazero; 00275 register ichar_t * cp; 00276 00277 if ((fpHash = fopen (hashname, "rb")) == NULL) 00278 { 00279 return (-1); 00280 } 00281 00282 m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash); 00283 if (m_hashsize < static_cast<int>(sizeof(m_hashheader))) 00284 { 00285 if (m_hashsize < 0) 00286 fprintf (stderr, LOOKUP_C_CANT_READ, hashname); 00287 else if (m_hashsize == 0) 00288 fprintf (stderr, LOOKUP_C_NULL_HASH, hashname); 00289 else 00290 fprintf (stderr, 00291 LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize, 00292 static_cast<int>(sizeof m_hashheader))); 00293 return (-1); 00294 } 00295 else if (m_hashheader.magic != MAGIC) 00296 { 00297 fprintf (stderr, 00298 LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC), 00299 static_cast<unsigned int>(m_hashheader.magic))); 00300 return (-1); 00301 } 00302 else if (m_hashheader.magic2 != MAGIC) 00303 { 00304 fprintf (stderr, 00305 LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC), 00306 static_cast<unsigned int>(m_hashheader.magic2))); 00307 return (-1); 00308 } 00309 /* else if (hashheader.compileoptions != COMPILEOPTIONS*/ 00310 else if ( 1 != 1 00311 || m_hashheader.maxstringchars != MAXSTRINGCHARS 00312 || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN) 00313 { 00314 fprintf (stderr, 00315 LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions), 00316 m_hashheader.maxstringchars, m_hashheader.maxstringcharlen, 00317 static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN)); 00318 return (-1); 00319 } 00320 00321 { 00322 m_hashtbl = 00323 (struct dent *) 00324 calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent)); 00325 m_hashsize = m_hashheader.tblsize; 00326 m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize))); 00327 } 00328 m_numsflags = m_hashheader.stblsize; 00329 m_numpflags = m_hashheader.ptblsize; 00330 m_sflaglist = (struct flagent *) 00331 malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent)); 00332 if (m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL) 00333 { 00334 fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); 00335 return (-1); 00336 } 00337 m_pflaglist = m_sflaglist + m_numsflags; 00338 00339 { 00340 if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash) 00341 != static_cast<size_t>(m_hashheader.stringsize) ) 00342 { 00343 fprintf (stderr, LOOKUP_C_BAD_FORMAT); 00344 fprintf (stderr, "stringsize err\n" ); 00345 return (-1); 00346 } 00347 if ( m_hashheader.compileoptions & 0x04 ) 00348 { 00349 if( fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash) 00350 != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent)))) 00351 { 00352 fprintf (stderr, LOOKUP_C_BAD_FORMAT); 00353 return (-1); 00354 } 00355 } 00356 else 00357 { 00358 for( x=0; x<m_hashheader.tblsize; x++ ) 00359 { 00360 if( fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash) 00361 != 1) 00362 { 00363 fprintf (stderr, LOOKUP_C_BAD_FORMAT); 00364 return (-1); 00365 } 00366 } /*for*/ 00367 } /*else*/ 00368 } 00369 if (fread (reinterpret_cast<char *>(m_sflaglist), 1, 00370 static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash) 00371 != (m_numsflags + m_numpflags) * sizeof (struct flagent)) 00372 { 00373 fprintf (stderr, LOOKUP_C_BAD_FORMAT); 00374 return (-1); 00375 } 00376 fclose (fpHash); 00377 00378 { 00379 for (i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++) 00380 { 00381 if (dp->word == (char *) -1) 00382 dp->word = NULL; 00383 else 00384 dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ]; 00385 if (dp->next == (struct dent *) -1) 00386 dp->next = NULL; 00387 else 00388 dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ]; 00389 } 00390 } 00391 00392 for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++) 00393 { 00394 if (entry->stripl) 00395 entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]); 00396 else 00397 entry->strip = NULL; 00398 if (entry->affl) 00399 entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]); 00400 else 00401 entry->affix = NULL; 00402 } 00403 /* 00404 ** Warning - 'entry' and 'i' are reset in the body of the loop 00405 ** below. Don't try to optimize it by (e.g.) moving the decrement 00406 ** of i into the loop condition. 00407 */ 00408 for (i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++) 00409 { 00410 if (entry->affl == 0) 00411 { 00412 cp = NULL; 00413 ind = &m_sflagindex[0]; 00414 viazero = 1; 00415 } 00416 else 00417 { 00418 cp = entry->affix + entry->affl - 1; 00419 ind = &m_sflagindex[*cp]; 00420 viazero = 0; 00421 while (ind->numents == 0 && ind->pu.fp != NULL) 00422 { 00423 if (cp == entry->affix) 00424 { 00425 ind = &ind->pu.fp[0]; 00426 viazero = 1; 00427 } 00428 else 00429 { 00430 ind = &ind->pu.fp[*--cp]; 00431 viazero = 0; 00432 } 00433 } 00434 } 00435 if (ind->numents == 0) 00436 ind->pu.ent = entry; 00437 ind->numents++; 00438 /* 00439 ** If this index entry has more than MAXSEARCH flags in 00440 ** it, we will split it into subentries to reduce the 00441 ** searching. However, the split doesn't make sense in 00442 ** two cases: (a) if we are already at the end of the 00443 ** current affix, or (b) if all the entries in the list 00444 ** have identical affixes. Since the list is sorted, (b) 00445 ** is true if the first and last affixes in the list 00446 ** are identical. 00447 */ 00448 if (!viazero && ind->numents >= MAXSEARCH 00449 && icharcmp (entry->affix, ind->pu.ent->affix) != 0) 00450 { 00451 /* Sneaky trick: back up and reprocess */ 00452 entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ 00453 i = m_numsflags - (entry - m_sflaglist); 00454 ind->pu.fp = 00455 (struct flagptr *) 00456 calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars), 00457 sizeof (struct flagptr)); 00458 if (ind->pu.fp == NULL) 00459 { 00460 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); 00461 return (-1); 00462 } 00463 ind->numents = 0; 00464 } 00465 } 00466 /* 00467 ** Warning - 'entry' and 'i' are reset in the body of the loop 00468 ** below. Don't try to optimize it by (e.g.) moving the decrement 00469 ** of i into the loop condition. 00470 */ 00471 for (i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++) 00472 { 00473 if (entry->affl == 0) 00474 { 00475 cp = NULL; 00476 ind = &m_pflagindex[0]; 00477 viazero = 1; 00478 } 00479 else 00480 { 00481 cp = entry->affix; 00482 ind = &m_pflagindex[*cp++]; 00483 viazero = 0; 00484 while (ind->numents == 0 && ind->pu.fp != NULL) 00485 { 00486 if (*cp == 0) 00487 { 00488 ind = &ind->pu.fp[0]; 00489 viazero = 1; 00490 } 00491 else 00492 { 00493 ind = &ind->pu.fp[*cp++]; 00494 viazero = 0; 00495 } 00496 } 00497 } 00498 if (ind->numents == 0) 00499 ind->pu.ent = entry; 00500 ind->numents++; 00501 /* 00502 ** If this index entry has more than MAXSEARCH flags in 00503 ** it, we will split it into subentries to reduce the 00504 ** searching. However, the split doesn't make sense in 00505 ** two cases: (a) if we are already at the end of the 00506 ** current affix, or (b) if all the entries in the list 00507 ** have identical affixes. Since the list is sorted, (b) 00508 ** is true if the first and last affixes in the list 00509 ** are identical. 00510 */ 00511 if (!viazero && ind->numents >= MAXSEARCH 00512 && icharcmp (entry->affix, ind->pu.ent->affix) != 0) 00513 { 00514 /* Sneaky trick: back up and reprocess */ 00515 entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ 00516 i = m_numpflags - (entry - m_pflaglist); 00517 ind->pu.fp = 00518 static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars, 00519 sizeof (struct flagptr))); 00520 if (ind->pu.fp == NULL) 00521 { 00522 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); 00523 return (-1); 00524 } 00525 ind->numents = 0; 00526 } 00527 } 00528 #ifdef INDEXDUMP 00529 fprintf (stderr, "Prefix index table:\n"); 00530 dumpindex (m_pflagindex, 0); 00531 fprintf (stderr, "Suffix index table:\n"); 00532 dumpindex (m_sflagindex, 0); 00533 #endif 00534 if (m_hashheader.nstrchartype == 0) 00535 m_chartypes = NULL; 00536 else 00537 { 00538 m_chartypes = (struct strchartype *) 00539 malloc (m_hashheader.nstrchartype * sizeof (struct strchartype)); 00540 if (m_chartypes == NULL) 00541 { 00542 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); 00543 return (-1); 00544 } 00545 for (i = 0, nextchar = m_hashheader.strtypestart; 00546 i < m_hashheader.nstrchartype; 00547 i++) 00548 { 00549 m_chartypes[i].name = &m_hashstrings[nextchar]; 00550 nextchar += strlen (m_chartypes[i].name) + 1; 00551 m_chartypes[i].deformatter = &m_hashstrings[nextchar]; 00552 nextchar += strlen (m_chartypes[i].deformatter) + 1; 00553 m_chartypes[i].suffixes = &m_hashstrings[nextchar]; 00554 while (m_hashstrings[nextchar] != '\0') 00555 nextchar += strlen (&m_hashstrings[nextchar]) + 1; 00556 nextchar++; 00557 } 00558 } 00559 00560 initckch(NULL); 00561 00562 return (0); 00563 } 00564 00565 #ifndef FREEP 00566 #define FREEP(p) do { if (p) free(p); } while (0) 00567 #endif 00568 00572 void ISpellChecker::initckch (char *wchars) 00573 { 00574 register ichar_t c; 00575 char num[4]; 00576 00577 for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c) 00578 { 00579 if (iswordch (c)) 00580 { 00581 if (!mylower (c)) 00582 { 00583 m_Try[m_Trynum] = c; 00584 ++m_Trynum; 00585 } 00586 } 00587 else if (isboundarych (c)) 00588 { 00589 m_Try[m_Trynum] = c; 00590 ++m_Trynum; 00591 } 00592 } 00593 if (wchars != NULL) 00594 { 00595 while (m_Trynum < SET_SIZE && *wchars != '\0') 00596 { 00597 if (*wchars != 'n' && *wchars != '\\') 00598 { 00599 c = *wchars; 00600 ++wchars; 00601 } 00602 else 00603 { 00604 ++wchars; 00605 num[0] = '\0'; 00606 num[1] = '\0'; 00607 num[2] = '\0'; 00608 num[3] = '\0'; 00609 if (isdigit (wchars[0])) 00610 { 00611 num[0] = wchars[0]; 00612 if (isdigit (wchars[1])) 00613 { 00614 num[1] = wchars[1]; 00615 if (isdigit (wchars[2])) 00616 num[2] = wchars[2]; 00617 } 00618 } 00619 if (wchars[-1] == 'n') 00620 { 00621 wchars += strlen (num); 00622 c = atoi (num); 00623 } 00624 else 00625 { 00626 wchars += strlen (num); 00627 c = 0; 00628 if (num[0]) 00629 c = num[0] - '0'; 00630 if (num[1]) 00631 { 00632 c <<= 3; 00633 c += num[1] - '0'; 00634 } 00635 if (num[2]) 00636 { 00637 c <<= 3; 00638 c += num[2] - '0'; 00639 } 00640 } 00641 } 00642 /* c &= NOPARITY;*/ 00643 if (!m_hashheader.wordchars[c]) 00644 { 00645 m_hashheader.wordchars[c] = 1; 00646 m_hashheader.sortorder[c] = m_hashheader.sortval++; 00647 m_Try[m_Trynum] = c; 00648 ++m_Trynum; 00649 } 00650 } 00651 } 00652 } 00653 00654 /* 00655 * \param indexp 00656 */ 00657 void ISpellChecker::clearindex (struct flagptr *indexp) 00658 { 00659 register int i; 00660 for (i = 0; i < SET_SIZE + m_hashheader.nstrchars; i++, indexp++) 00661 { 00662 if (indexp->numents == 0 && indexp->pu.fp != NULL) 00663 { 00664 clearindex(indexp->pu.fp); 00665 free(indexp->pu.fp); 00666 } 00667 } 00668 } 00669 00670 #ifdef INDEXDUMP 00671 static void dumpindex (indexp, depth) 00672 register struct flagptr * indexp; 00673 register int depth; 00674 { 00675 register int i; 00676 int j; 00677 int k; 00678 char stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; 00679 00680 for (i = 0; i < SET_SIZE + hashheader.nstrchars; i++, indexp++) 00681 { 00682 if (indexp->numents == 0 && indexp->pu.fp != NULL) 00683 { 00684 for (j = depth; --j >= 0; ) 00685 putc (' ', stderr); 00686 if (i >= ' ' && i <= '~') 00687 putc (i, stderr); 00688 else 00689 fprintf (stderr, "0x%x", i); 00690 putc ('\n', stderr); 00691 dumpindex (indexp->pu.fp, depth + 1); 00692 } 00693 else if (indexp->numents) 00694 { 00695 for (j = depth; --j >= 0; ) 00696 putc (' ', stderr); 00697 if (i >= ' ' && i <= '~') 00698 putc (i, stderr); 00699 else 00700 fprintf (stderr, "0x%x", i); 00701 fprintf (stderr, " -> %d entries\n", indexp->numents); 00702 for (k = 0; k < indexp->numents; k++) 00703 { 00704 for (j = depth; --j >= 0; ) 00705 putc (' ', stderr); 00706 if (indexp->pu.ent[k].stripl) 00707 { 00708 ichartostr (stripbuf, indexp->pu.ent[k].strip, 00709 sizeof stripbuf, 1); 00710 fprintf (stderr, " entry %d (-%s,%s)\n", 00711 &indexp->pu.ent[k] - sflaglist, 00712 stripbuf, 00713 indexp->pu.ent[k].affl 00714 ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-"); 00715 } 00716 else 00717 fprintf (stderr, " entry %d (%s)\n", 00718 &indexp->pu.ent[k] - sflaglist, 00719 ichartosstr (indexp->pu.ent[k].affix, 1)); 00720 } 00721 } 00722 } 00723 } 00724 #endif 00725 00726 /* n is length of s */ 00727 00728 /* 00729 * \param s 00730 * \param dotree 00731 * 00732 * \return 00733 */ 00734 struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree) 00735 { 00736 register struct dent * dp; 00737 register char * s1; 00738 char schar[INPUTWORDLEN + MAXAFFIXLEN]; 00739 00740 dp = &m_hashtbl[hash (s, m_hashsize)]; 00741 if (ichartostr (schar, s, sizeof schar, 1)) 00742 fprintf (stderr, WORD_TOO_LONG (schar)); 00743 for ( ; dp != NULL; dp = dp->next) 00744 { 00745 /* quick strcmp, but only for equality */ 00746 s1 = dp->word; 00747 if (s1 && s1[0] == schar[0] && strcmp (s1 + 1, schar + 1) == 0) 00748 return dp; 00749 #ifndef NO_CAPITALIZATION_SUPPORT 00750 while (dp->flagfield & MOREVARIANTS) /* Skip variations */ 00751 dp = dp->next; 00752 #endif 00753 } 00754 return NULL; 00755 } 00756 00757 void ISpellChecker::alloc_ispell_struct() 00758 { 00759 m_translate_in = 0; 00760 } 00761 00762 void ISpellChecker::free_ispell_struct() 00763 { 00764 }