001/* BreakIterator.java -- Breaks text into elements 002 Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007 003 Free Software Foundation, Inc. 004 005This file is part of GNU Classpath. 006 007GNU Classpath is free software; you can redistribute it and/or modify 008it under the terms of the GNU General Public License as published by 009the Free Software Foundation; either version 2, or (at your option) 010any later version. 011 012GNU Classpath is distributed in the hope that it will be useful, but 013WITHOUT ANY WARRANTY; without even the implied warranty of 014MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015General Public License for more details. 016 017You should have received a copy of the GNU General Public License 018along with GNU Classpath; see the file COPYING. If not, write to the 019Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02002110-1301 USA. 021 022Linking this library statically or dynamically with other modules is 023making a combined work based on this library. Thus, the terms and 024conditions of the GNU General Public License cover the whole 025combination. 026 027As a special exception, the copyright holders of this library give you 028permission to link this library with independent modules to produce an 029executable, regardless of the license terms of these independent 030modules, and to copy and distribute the resulting executable under 031terms of your choice, provided that you also meet, for each linked 032independent module, the terms and conditions of the license of that 033module. An independent module is a module which is not derived from 034or based on this library. If you modify this library, you may extend 035this exception to your version of the library, but you are not 036obligated to do so. If you do not wish to do so, delete this 037exception statement from your version. */ 038 039 040package java.text; 041 042import gnu.java.locale.LocaleHelper; 043 044import gnu.java.text.CharacterBreakIterator; 045import gnu.java.text.LineBreakIterator; 046import gnu.java.text.SentenceBreakIterator; 047import gnu.java.text.WordBreakIterator; 048 049import java.text.spi.BreakIteratorProvider; 050 051import java.util.Locale; 052import java.util.MissingResourceException; 053import java.util.ResourceBundle; 054import java.util.ServiceLoader; 055 056/** 057 * This class iterates over text elements such as words, lines, sentences, 058 * and characters. It can only iterate over one of these text elements at 059 * a time. An instance of this class configured for the desired iteration 060 * type is created by calling one of the static factory methods, not 061 * by directly calling a constructor. 062 * 063 * The standard iterators created by the factory methods in this 064 * class will be valid upon creation. That is, their methods will 065 * not cause exceptions if called before you call setText(). 066 * 067 * @author Tom Tromey (tromey@cygnus.com) 068 * @author Aaron M. Renn (arenn@urbanophile.com) 069 * @date March 19, 1999 070 */ 071/* Written using "Java Class Libraries", 2nd edition, plus online 072 * API docs for JDK 1.2 beta from http://www.javasoft.com. 073 * Status: Believed complete and correct to 1.1. 074 */ 075public abstract class BreakIterator implements Cloneable 076{ 077 /** 078 * This value is returned by the <code>next()</code> and 079 * <code>previous</code> in order to indicate that the end of the 080 * text has been reached. 081 */ 082 // The value was discovered by writing a test program. 083 public static final int DONE = -1; 084 085 /** 086 * This method initializes a new instance of <code>BreakIterator</code>. 087 * This protected constructor is available to subclasses as a default 088 * no-arg superclass constructor. 089 */ 090 protected BreakIterator () 091 { 092 } 093 094 /** 095 * Create a clone of this object. 096 */ 097 public Object clone () 098 { 099 try 100 { 101 return super.clone(); 102 } 103 catch (CloneNotSupportedException e) 104 { 105 return null; 106 } 107 } 108 109 /** 110 * This method returns the index of the current text element boundary. 111 * 112 * @return The current text boundary. 113 */ 114 public abstract int current (); 115 116 /** 117 * This method returns the first text element boundary in the text being 118 * iterated over. 119 * 120 * @return The first text boundary. 121 */ 122 public abstract int first (); 123 124 /** 125 * This methdod returns the offset of the text element boundary following 126 * the specified offset. 127 * 128 * @param pos The text index from which to find the next text boundary. 129 * 130 * @return The next text boundary following the specified index. 131 */ 132 public abstract int following (int pos); 133 134 /** 135 * This method returns a list of locales for which instances of 136 * <code>BreakIterator</code> are available. 137 * 138 * @return A list of available locales 139 */ 140 public static synchronized Locale[] getAvailableLocales () 141 { 142 Locale[] l = new Locale[1]; 143 l[0] = Locale.US; 144 return l; 145 } 146 147 private static BreakIterator getInstance (String type, Locale loc) 148 { 149 String className; 150 try 151 { 152 ResourceBundle res 153 = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation", 154 loc, ClassLoader.getSystemClassLoader()); 155 className = res.getString(type); 156 } 157 catch (MissingResourceException x) 158 { 159 return null; 160 } 161 try 162 { 163 Class k = Class.forName(className); 164 return (BreakIterator) k.newInstance(); 165 } 166 catch (ClassNotFoundException x1) 167 { 168 return null; 169 } 170 catch (InstantiationException x2) 171 { 172 return null; 173 } 174 catch (IllegalAccessException x3) 175 { 176 return null; 177 } 178 } 179 180 /** 181 * This method returns an instance of <code>BreakIterator</code> that will 182 * iterate over characters as defined in the default locale. 183 * 184 * @return A <code>BreakIterator</code> instance for the default locale. 185 */ 186 public static BreakIterator getCharacterInstance () 187 { 188 return getCharacterInstance (Locale.getDefault()); 189 } 190 191 /** 192 * This method returns an instance of <code>BreakIterator</code> that will 193 * iterate over characters as defined in the specified locale. 194 * 195 * @param locale The desired locale. 196 * 197 * @return A <code>BreakIterator</code> instance for the specified locale. 198 */ 199 public static BreakIterator getCharacterInstance (Locale locale) 200 { 201 BreakIterator r = getInstance("CharacterIterator", locale); 202 if (r != null) 203 return r; 204 for (BreakIteratorProvider p : 205 ServiceLoader.load(BreakIteratorProvider.class)) 206 { 207 for (Locale loc : p.getAvailableLocales()) 208 { 209 if (loc.equals(locale)) 210 { 211 BreakIterator bi = p.getCharacterInstance(locale); 212 if (bi != null) 213 return bi; 214 break; 215 } 216 } 217 } 218 if (locale.equals(Locale.ROOT)) 219 return new CharacterBreakIterator(); 220 return getCharacterInstance(LocaleHelper.getFallbackLocale(locale)); 221 } 222 223 /** 224 * This method returns an instance of <code>BreakIterator</code> that will 225 * iterate over line breaks as defined in the default locale. 226 * 227 * @return A <code>BreakIterator</code> instance for the default locale. 228 */ 229 public static BreakIterator getLineInstance () 230 { 231 return getLineInstance (Locale.getDefault()); 232 } 233 234 /** 235 * This method returns an instance of <code>BreakIterator</code> that will 236 * iterate over line breaks as defined in the specified locale. 237 * 238 * @param locale The desired locale. 239 * 240 * @return A <code>BreakIterator</code> instance for the default locale. 241 */ 242 public static BreakIterator getLineInstance (Locale locale) 243 { 244 BreakIterator r = getInstance ("LineIterator", locale); 245 if (r != null) 246 return r; 247 for (BreakIteratorProvider p : 248 ServiceLoader.load(BreakIteratorProvider.class)) 249 { 250 for (Locale loc : p.getAvailableLocales()) 251 { 252 if (loc.equals(locale)) 253 { 254 BreakIterator bi = p.getLineInstance(locale); 255 if (bi != null) 256 return bi; 257 break; 258 } 259 } 260 } 261 if (locale.equals(Locale.ROOT)) 262 return new LineBreakIterator(); 263 return getLineInstance(LocaleHelper.getFallbackLocale(locale)); 264 } 265 266 /** 267 * This method returns an instance of <code>BreakIterator</code> that will 268 * iterate over sentences as defined in the default locale. 269 * 270 * @return A <code>BreakIterator</code> instance for the default locale. 271 */ 272 public static BreakIterator getSentenceInstance () 273 { 274 return getSentenceInstance (Locale.getDefault()); 275 } 276 277 /** 278 * This method returns an instance of <code>BreakIterator</code> that will 279 * iterate over sentences as defined in the specified locale. 280 * 281 * @param locale The desired locale. 282 * 283 * @return A <code>BreakIterator</code> instance for the default locale. 284 */ 285 public static BreakIterator getSentenceInstance (Locale locale) 286 { 287 BreakIterator r = getInstance ("SentenceIterator", locale); 288 if (r != null) 289 return r; 290 for (BreakIteratorProvider p : 291 ServiceLoader.load(BreakIteratorProvider.class)) 292 { 293 for (Locale loc : p.getAvailableLocales()) 294 { 295 if (loc.equals(locale)) 296 { 297 BreakIterator bi = p.getSentenceInstance(locale); 298 if (bi != null) 299 return bi; 300 break; 301 } 302 } 303 } 304 if (locale.equals(Locale.ROOT)) 305 return new SentenceBreakIterator(); 306 return getSentenceInstance(LocaleHelper.getFallbackLocale(locale)); 307 } 308 309 /** 310 * This method returns the text this object is iterating over as a 311 * <code>CharacterIterator</code>. 312 * 313 * @return The text being iterated over. 314 */ 315 public abstract CharacterIterator getText (); 316 317 /** 318 * This method returns an instance of <code>BreakIterator</code> that will 319 * iterate over words as defined in the default locale. 320 * 321 * @return A <code>BreakIterator</code> instance for the default locale. 322 */ 323 public static BreakIterator getWordInstance () 324 { 325 return getWordInstance (Locale.getDefault()); 326 } 327 328 /** 329 * This method returns an instance of <code>BreakIterator</code> that will 330 * iterate over words as defined in the specified locale. 331 * 332 * @param locale The desired locale. 333 * 334 * @return A <code>BreakIterator</code> instance for the default locale. 335 */ 336 public static BreakIterator getWordInstance (Locale locale) 337 { 338 BreakIterator r = getInstance ("WordIterator", locale); 339 if (r != null) 340 return r; 341 for (BreakIteratorProvider p : 342 ServiceLoader.load(BreakIteratorProvider.class)) 343 { 344 for (Locale loc : p.getAvailableLocales()) 345 { 346 if (loc.equals(locale)) 347 { 348 BreakIterator bi = p.getWordInstance(locale); 349 if (bi != null) 350 return bi; 351 break; 352 } 353 } 354 } 355 if (locale.equals(Locale.ROOT)) 356 return new WordBreakIterator(); 357 return getWordInstance(LocaleHelper.getFallbackLocale(locale)); 358 } 359 360 /** 361 * This method tests whether or not the specified position is a text 362 * element boundary. 363 * 364 * @param pos The text position to test. 365 * 366 * @return <code>true</code> if the position is a boundary, 367 * <code>false</code> otherwise. 368 */ 369 public boolean isBoundary (int pos) 370 { 371 if (pos == 0) 372 return true; 373 return following (pos - 1) == pos; 374 } 375 376 /** 377 * This method returns the last text element boundary in the text being 378 * iterated over. 379 * 380 * @return The last text boundary. 381 */ 382 public abstract int last (); 383 384 /** 385 * This method returns the text element boundary following the current 386 * text position. 387 * 388 * @return The next text boundary. 389 */ 390 public abstract int next (); 391 392 /** 393 * This method returns the n'th text element boundary following the current 394 * text position. 395 * 396 * @param n The number of text element boundaries to skip. 397 * 398 * @return The next text boundary. 399 */ 400 public abstract int next (int n); 401 402 /** 403 * This methdod returns the offset of the text element boundary preceding 404 * the specified offset. 405 * 406 * @param pos The text index from which to find the preceding text boundary. 407 * 408 * @returns The next text boundary preceding the specified index. 409 */ 410 public int preceding (int pos) 411 { 412 if (following (pos) == DONE) 413 last (); 414 while (previous () >= pos) 415 ; 416 return current (); 417 } 418 419 /** 420 * This method returns the text element boundary preceding the current 421 * text position. 422 * 423 * @return The previous text boundary. 424 */ 425 public abstract int previous (); 426 427 /** 428 * This method sets the text string to iterate over. 429 * 430 * @param newText The <code>String</code> to iterate over. 431 */ 432 public void setText (String newText) 433 { 434 setText (new StringCharacterIterator (newText)); 435 } 436 437 /** 438 * This method sets the text to iterate over from the specified 439 * <code>CharacterIterator</code>. 440 * 441 * @param newText The desired <code>CharacterIterator</code>. 442 */ 443 public abstract void setText (CharacterIterator newText); 444}