001/* 002 * Resolver.java February 2001 003 * 004 * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 015 * implied. See the License for the specific language governing 016 * permissions and limitations under the License. 017 */ 018 019package org.simpleframework.xml.util; 020 021import java.util.AbstractSet; 022import java.util.ArrayList; 023import java.util.Iterator; 024import java.util.LinkedList; 025import java.util.List; 026 027/** 028 * This is used to store <code>Match</code> objects, which can then be 029 * retrieved using a string by comparing that string to the pattern of 030 * the <code>Match</code> objects. Patterns consist of characters 031 * with either the '*' or '?' characters as wild characters. The '*' 032 * character is completely wild meaning that is will match nothing or 033 * a long sequence of characters. The '?' character matches a single 034 * character. 035 * <p> 036 * If the '?' character immediately follows the '*' character then the 037 * match is made as any sequence of characters up to the first match 038 * of the next character. For example "/*?/index.jsp" will match all 039 * files preceeded by only a single path. So "/pub/index.jsp" will 040 * match, however "/pub/bin/index.jsp" will not, as it has two paths. 041 * So, in effect the '*?' sequence will match anything or nothing up 042 * to the first occurence of the next character in the pattern. 043 * <p> 044 * A design goal of the <code>Resolver</code> was to make it capable 045 * of high performance. In order to achieve a high performance the 046 * <code>Resolver</code> can cache the resolutions it makes so that if 047 * the same text is given to the <code>Resolver.resolve</code> method 048 * a cached result can be retrived quickly which will decrease the 049 * length of time and work required to perform the match. 050 * <p> 051 * The semantics of the resolver are such that the last pattern added 052 * with a wild string is the first one checked for a match. This means 053 * that if a sequence of insertions like <code>add(x)</code> followed 054 * by <code>add(y)</code> is made, then a <code>resolve(z)</code> will 055 * result in a comparison to y first and then x, if z matches y then 056 * it is given as the result and if z does not match y and matches x 057 * then x is returned, remember if z matches both x and y then y will 058 * be the result due to the fact that is was the last pattern added. 059 * 060 * @author Niall Gallagher 061 */ 062public class Resolver<M extends Match> extends AbstractSet<M> { 063 064 /** 065 * Caches the text resolutions made to reduce the work required. 066 */ 067 protected final Cache cache; 068 069 /** 070 * Stores the matches added to the resolver in resolution order. 071 */ 072 protected final Stack stack; 073 074 /** 075 * The default constructor will create a <code>Resolver</code> 076 * without a large cache size. This is intended for use when 077 * the requests for <code>resolve</code> tend to use strings 078 * that are reasonably similar. If the strings issued to this 079 * instance are dramatically different then the cache tends 080 * to be an overhead rather than a bonus. 081 */ 082 public Resolver(){ 083 this.stack = new Stack(); 084 this.cache = new Cache(); 085 } 086 087 /** 088 * This will search the patterns in this <code>Resolver</code> to 089 * see if there is a pattern in it that matches the string given. 090 * This will search the patterns from the last entered pattern to 091 * the first entered. So that the last entered patterns are the 092 * most searched patterns and will resolve it first if it matches. 093 * 094 * @param text this is the string that is to be matched by this 095 * 096 * @return this will return the first match within the resolver 097 */ 098 public M resolve(String text){ 099 List<M> list = cache.get(text); 100 101 if(list == null) { 102 list = resolveAll(text); 103 } 104 if(list.isEmpty()) { 105 return null; 106 } 107 return list.get(0); 108 } 109 110 /** 111 * This will search the patterns in this <code>Resolver</code> to 112 * see if there is a pattern in it that matches the string given. 113 * This will search the patterns from the last entered pattern to 114 * the first entered. So that the last entered patterns are the 115 * most searched patterns and will resolve it first if it matches. 116 * 117 * @param text this is the string that is to be matched by this 118 * 119 * @return this will return all of the matches within the resolver 120 */ 121 public List<M> resolveAll(String text){ 122 List<M> list = cache.get(text); 123 124 if(list != null) { 125 return list; 126 } 127 char[] array = text.toCharArray(); 128 129 if(array == null) { 130 return null; 131 } 132 return resolveAll(text, array); 133 } 134 135 /** 136 * This will search the patterns in this <code>Resolver</code> to 137 * see if there is a pattern in it that matches the string given. 138 * This will search the patterns from the last entered pattern to 139 * the first entered. So that the last entered patterns are the 140 * most searched patterns and will resolve it first if it matches. 141 * 142 * @param text this is the string that is to be matched by this 143 * @param array this is the character array of the text string 144 * 145 * @return this will return all of the matches within the resolver 146 */ 147 private List<M> resolveAll(String text, char[] array){ 148 List<M> list = new ArrayList<M>(); 149 150 for(M match : stack) { 151 String wild = match.getPattern(); 152 153 if(match(array, wild.toCharArray())){ 154 cache.put(text, list); 155 list.add(match); 156 } 157 } 158 return list; 159 } 160 161 /** 162 * This inserts the <code>Match</code> implementation into the set 163 * so that it can be used for resolutions. The last added match is 164 * the first resolved. Because this changes the state of the 165 * resolver this clears the cache as it may affect resolutions. 166 * 167 * @param match this is the match that is to be inserted to this 168 * 169 * @return returns true if the addition succeeded, always true 170 */ 171 public boolean add(M match) { 172 stack.push(match); 173 return true; 174 } 175 176 /** 177 * This returns an <code>Iterator</code> that iterates over the 178 * matches in insertion order. So the first match added is the 179 * first retrieved from the <code>Iterator</code>. This order is 180 * used to ensure that resolver can be serialized properly. 181 * 182 * @return returns an iterator for the sequence of insertion 183 */ 184 public Iterator<M> iterator() { 185 return stack.sequence(); 186 } 187 188 /** 189 * This is used to remove the <code>Match</code> implementation 190 * from the resolver. This clears the cache as the removal of 191 * a match may affect the resoultions existing in the cache. The 192 * <code>equals</code> method of the match must be implemented. 193 * 194 * @param match this is the match that is to be removed 195 * 196 * @return true of the removal of the match was successful 197 */ 198 public boolean remove(M match) { 199 cache.clear(); 200 return stack.remove(match); 201 } 202 203 /** 204 * Returns the number of matches that have been inserted into 205 * the <code>Resolver</code>. Although this is a set, it does 206 * not mean that matches cannot used the same pattern string. 207 * 208 * @return this returns the number of matches within the set 209 */ 210 public int size() { 211 return stack.size(); 212 } 213 214 /** 215 * This is used to clear all matches from the set. This ensures 216 * that the resolver contains no matches and that the resolution 217 * cache is cleared. This is used to that the set can be reused 218 * and have new pattern matches inserted into it for resolution. 219 */ 220 public void clear() { 221 cache.clear(); 222 stack.clear(); 223 } 224 225 /** 226 * This acts as a driver to the <code>match</code> method so that 227 * the offsets can be used as zeros for the start of matching for 228 * the <code>match(char[],int,char[],int)</code>. method. This is 229 * also used as the initializing driver for the recursive method. 230 * 231 * @param text this is the buffer that is to be resolved 232 * @param wild this is the pattern that will be used 233 */ 234 private boolean match(char[] text, char[] wild){ 235 return match(text, 0, wild, 0); 236 } 237 238 /** 239 * This will be used to check to see if a certain buffer matches 240 * the pattern if it does then it returns <code>true</code>. This 241 * is a recursive method that will attempt to match the buffers 242 * based on the wild characters '?' and '*'. If there is a match 243 * then this returns <code>true</code>. 244 * 245 * @param text this is the buffer that is to be resolved 246 * @param off this is the read offset for the text buffer 247 * @param wild this is the pattern that will be used 248 * @param pos this is the read offset for the wild buffer 249 */ 250 private boolean match(char[] text, int off, char[] wild, int pos){ 251 while(pos < wild.length && off < text.length){ /* examine chars */ 252 if(wild[pos] == '*'){ 253 while(wild[pos] == '*'){ /* totally wild */ 254 if(++pos >= wild.length) /* if finished */ 255 return true; 256 } 257 if(wild[pos] == '?') { /* *? is special */ 258 if(++pos >= wild.length) 259 return true; 260 } 261 for(; off < text.length; off++){ /* find next matching char */ 262 if(text[off] == wild[pos] || wild[pos] == '?'){ /* match */ 263 if(wild[pos - 1] != '?'){ 264 if(match(text, off, wild, pos)) 265 return true; 266 } else { 267 break; 268 } 269 } 270 } 271 if(text.length == off) 272 return false; 273 } 274 if(text[off++] != wild[pos++]){ 275 if(wild[pos-1] != '?') 276 return false; /* if not equal */ 277 } 278 } 279 if(wild.length == pos){ /* if wild is finished */ 280 return text.length == off; /* is text finished */ 281 } 282 while(wild[pos] == '*'){ /* ends in all stars */ 283 if(++pos >= wild.length) /* if finished */ 284 return true; 285 } 286 return false; 287 } 288 289 290 /** 291 * This is used to cache resolutions made so that the matches can 292 * be acquired the next time without performing the resolution. 293 * This is an LRU cache so regardless of the number of resolutions 294 * made this will not result in a memory leak for the resolver. 295 * 296 * @author Niall Gallagher 297 */ 298 private class Cache extends LimitedCache<List<M>> { 299 300 /** 301 * Constructor for the <code>Cache</code> object. This is a 302 * constructor that creates the linked hash map such that 303 * it will purge the entries that are oldest within the map. 304 */ 305 public Cache() { 306 super(1024); 307 } 308 } 309 310 /** 311 * This is used to store the <code>Match</code> implementations in 312 * resolution order. Resolving the match objects is performed so 313 * that the last inserted match object is the first used in the 314 * resolution process. This gives priority to the last inserted. 315 * 316 * @author Niall Gallagher 317 */ 318 private class Stack extends LinkedList<M> { 319 320 /** 321 * The <code>push</code> method is used to push the match to 322 * the top of the stack. This also ensures that the cache is 323 * cleared so the semantics of the resolver are not affected. 324 * 325 * @param match this is the match to be inserted to the stack 326 */ 327 public void push(M match) { 328 cache.clear(); 329 addFirst(match); 330 } 331 332 /** 333 * The <code>purge</code> method is used to purge a match from 334 * the provided position. This also ensures that the cache is 335 * cleared so that the semantics of the resolver do not change. 336 * 337 * @param index the index of the match that is to be removed 338 */ 339 public void purge(int index) { 340 cache.clear(); 341 remove(index); 342 } 343 344 /** 345 * This is returned from the <code>Resolver.iterator</code> so 346 * that matches can be iterated in insertion order. When a 347 * match is removed from this iterator then it clears the cache 348 * and removed the match from the <code>Stack</code> object. 349 * 350 * @return returns an iterator to iterate in insertion order 351 */ 352 public Iterator<M> sequence() { 353 return new Sequence(); 354 } 355 356 /** 357 * The is used to order the <code>Match</code> objects in the 358 * insertion order. Iterating in insertion order allows the 359 * resolver object to be serialized and deserialized to and 360 * from an XML document without disruption resolution order. 361 * 362 * @author Niall Gallagher 363 */ 364 private class Sequence implements Iterator<M> { 365 366 /** 367 * The cursor used to acquire objects from the stack. 368 */ 369 private int cursor; 370 371 /** 372 * Constructor for the <code>Sequence</code> object. This is 373 * used to position the cursor at the end of the list so the 374 * first inserted match is the first returned from this. 375 */ 376 public Sequence() { 377 this.cursor = size(); 378 } 379 380 /** 381 * This returns the <code>Match</code> object at the cursor 382 * position. If the cursor has reached the start of the 383 * list then this returns null instead of the first match. 384 * 385 * @return this returns the match from the cursor position 386 */ 387 public M next() { 388 if(hasNext()) { 389 return get(--cursor); 390 } 391 return null; 392 } 393 394 /** 395 * This is used to determine if the cursor has reached the 396 * start of the list. When the cursor reaches the start of 397 * the list then this method returns false. 398 * 399 * @return this returns true if there are more matches left 400 */ 401 public boolean hasNext() { 402 return cursor > 0; 403 } 404 405 /** 406 * Removes the match from the cursor position. This also 407 * ensures that the cache is cleared so that resolutions 408 * made before the removal do not affect the semantics. 409 */ 410 public void remove() { 411 purge(cursor); 412 } 413 } 414 } 415}