001/*
002 * Resolver.java February 2001
003 *
004 * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
015 * implied. See the License for the specific language governing 
016 * permissions and limitations under the License.
017 */
018 
019package org.simpleframework.xml.util;
020
021import java.util.AbstractSet;
022import java.util.ArrayList;
023import java.util.Iterator;
024import java.util.LinkedList;
025import java.util.List;
026
027/**
028 * This is used to store <code>Match</code> objects, which can then be
029 * retrieved using a string by comparing that string to the pattern of
030 * the <code>Match</code> objects. Patterns consist of characters
031 * with either the '*' or '?' characters as wild characters. The '*'
032 * character is completely wild meaning that is will match nothing or
033 * a long sequence of characters. The '?' character matches a single
034 * character.
035 * <p>
036 * If the '?' character immediately follows the '*' character then the
037 * match is made as any sequence of characters up to the first match 
038 * of the next character. For example "/*?/index.jsp" will match all 
039 * files preceeded by only a single path. So "/pub/index.jsp" will
040 * match, however "/pub/bin/index.jsp" will not, as it has two paths.
041 * So, in effect the '*?' sequence will match anything or nothing up
042 * to the first occurence of the next character in the pattern.
043 * <p>
044 * A design goal of the <code>Resolver</code> was to make it capable
045 * of  high performance. In order to achieve a high performance the 
046 * <code>Resolver</code> can cache the resolutions it makes so that if
047 * the same text is given to the <code>Resolver.resolve</code> method 
048 * a cached result can be retrived quickly which will decrease the 
049 * length of time and work required to perform the match.
050 * <p>
051 * The semantics of the resolver are such that the last pattern added
052 * with a wild string is the first one checked for a match. This means
053 * that if a sequence of insertions like <code>add(x)</code> followed
054 * by <code>add(y)</code> is made, then a <code>resolve(z)</code> will
055 * result in a comparison to y first and then x, if z matches y then 
056 * it is given as the result and if z does not match y and matches x 
057 * then x is returned, remember if z matches both x and y then y will 
058 * be the result due to the fact that is was the last pattern added.
059 *
060 * @author Niall Gallagher
061 */
062public class Resolver<M extends Match> extends AbstractSet<M> {
063
064   /**
065    * Caches the text resolutions made to reduce the work required.
066    */        
067   protected final Cache cache;
068
069   /**
070    * Stores the matches added to the resolver in resolution order.
071    */ 
072   protected final Stack stack;
073
074   /**
075    * The default constructor will create a <code>Resolver</code>
076    * without a large cache size. This is intended for use when 
077    * the requests for <code>resolve</code> tend to use strings
078    * that are reasonably similar. If the strings issued to this
079    * instance are dramatically different then the cache tends 
080    * to be an overhead rather than a bonus.
081    */
082   public Resolver(){
083      this.stack = new Stack();
084      this.cache = new Cache(); 
085   }
086
087   /**
088    * This will search the patterns in this <code>Resolver</code> to
089    * see if there is a pattern in it that matches the string given.
090    * This will search the patterns from the last entered pattern to
091    * the first entered. So that the last entered patterns are the
092    * most searched patterns and will resolve it first if it matches.
093    *
094    * @param text this is the string that is to be matched by this
095    *
096    * @return this will return the first match within the resolver
097    */
098   public M resolve(String text){
099      List<M> list = cache.get(text);
100      
101      if(list == null) {
102         list = resolveAll(text);
103      }
104      if(list.isEmpty()) {
105         return null;
106      }
107      return list.get(0);
108   }
109   
110   /**
111    * This will search the patterns in this <code>Resolver</code> to
112    * see if there is a pattern in it that matches the string given.
113    * This will search the patterns from the last entered pattern to
114    * the first entered. So that the last entered patterns are the
115    * most searched patterns and will resolve it first if it matches.
116    *
117    * @param text this is the string that is to be matched by this
118    *
119    * @return this will return all of the matches within the resolver
120    */
121   public List<M> resolveAll(String text){
122      List<M> list = cache.get(text);
123      
124      if(list != null) {
125         return list;
126      }
127      char[] array = text.toCharArray();
128  
129      if(array == null) {
130         return null;
131      }
132      return resolveAll(text, array);
133   }
134   
135   /**
136    * This will search the patterns in this <code>Resolver</code> to
137    * see if there is a pattern in it that matches the string given.
138    * This will search the patterns from the last entered pattern to
139    * the first entered. So that the last entered patterns are the
140    * most searched patterns and will resolve it first if it matches.
141    *
142    * @param text this is the string that is to be matched by this
143    * @param array this is the character array of the text string
144    *
145    * @return this will return all of the matches within the resolver
146    */
147   private List<M> resolveAll(String text, char[] array){
148      List<M> list = new ArrayList<M>();
149      
150      for(M match : stack) {
151         String wild = match.getPattern();
152
153         if(match(array, wild.toCharArray())){
154            cache.put(text, list);
155            list.add(match);
156         }
157      }
158      return list;
159   }
160
161   /**
162    * This inserts the <code>Match</code> implementation into the set
163    * so that it can be used for resolutions. The last added match is
164    * the first resolved. Because this changes the state of the 
165    * resolver this clears the cache as it may affect resolutions.
166    *
167    * @param match this is the match that is to be inserted to this
168    *
169    * @return returns true if the addition succeeded, always true
170    */ 
171   public boolean add(M match) {
172      stack.push(match);
173      return true;
174   }
175   
176   /**
177    * This returns an <code>Iterator</code> that iterates over the
178    * matches in insertion order. So the first match added is the
179    * first retrieved from the <code>Iterator</code>. This order is
180    * used to ensure that resolver can be serialized properly.
181    *
182    * @return returns an iterator for the sequence of insertion
183    */ 
184   public Iterator<M> iterator() {
185      return stack.sequence();
186   }
187
188   /**
189    * This is used to remove the <code>Match</code> implementation
190    * from the resolver. This clears the cache as the removal of
191    * a match may affect the resoultions existing in the cache. The
192    * <code>equals</code> method of the match must be implemented.
193    *
194    * @param match this is the match that is to be removed
195    *
196    * @return true of the removal of the match was successful
197    */ 
198   public boolean remove(M match) {
199      cache.clear();
200      return stack.remove(match);
201   }
202
203   /**
204    * Returns the number of matches that have been inserted into 
205    * the <code>Resolver</code>. Although this is a set, it does 
206    * not mean that matches cannot used the same pattern string.
207    *
208    * @return this returns the number of matches within the set
209    */ 
210   public int size() {
211      return stack.size();           
212   }
213   
214   /**
215    * This is used to clear all matches from the set. This ensures
216    * that the resolver contains no matches and that the resolution 
217    * cache is cleared. This is used to that the set can be reused
218    * and have new pattern matches inserted into it for resolution.
219    */ 
220   public void clear() {
221      cache.clear();      
222      stack.clear();
223   }
224
225   /**
226    * This acts as a driver to the <code>match</code> method so that
227    * the offsets can be used as zeros for the start of matching for 
228    * the <code>match(char[],int,char[],int)</code>. method. This is
229    * also used as the initializing driver for the recursive method.
230    *
231    * @param text this is the buffer that is to be resolved
232    * @param wild this is the pattern that will be used
233    */
234   private boolean match(char[] text, char[] wild){
235      return match(text, 0, wild, 0);
236   }
237
238   /**
239    * This will be used to check to see if a certain buffer matches
240    * the pattern if it does then it returns <code>true</code>. This
241    * is a recursive method that will attempt to match the buffers 
242    * based on the wild characters '?' and '*'. If there is a match
243    * then this returns <code>true</code>.
244    *
245    * @param text this is the buffer that is to be resolved
246    * @param off this is the read offset for the text buffer
247    * @param wild this is the pattern that will be used
248    * @param pos this is the read offset for the wild buffer
249    */
250   private boolean match(char[] text, int off, char[] wild, int pos){
251      while(pos < wild.length && off < text.length){ /* examine chars */
252         if(wild[pos] == '*'){
253            while(wild[pos] == '*'){ /* totally wild */
254               if(++pos >= wild.length) /* if finished */
255                  return true;
256            }
257            if(wild[pos] == '?') { /* *? is special */
258               if(++pos >= wild.length)                    
259                  return true;
260            }
261            for(; off < text.length; off++){ /* find next matching char */
262               if(text[off] == wild[pos] || wild[pos] == '?'){ /* match */
263                  if(wild[pos - 1] != '?'){
264                     if(match(text, off, wild, pos))
265                        return true;
266                  } else {
267                     break;                          
268                  }
269               }
270            }
271            if(text.length == off)
272               return false;
273         }
274         if(text[off++] != wild[pos++]){
275            if(wild[pos-1] != '?')
276               return false; /* if not equal */
277         }
278      }
279      if(wild.length == pos){ /* if wild is finished */
280          return text.length == off; /* is text finished */
281      }
282      while(wild[pos] == '*'){ /* ends in all stars */
283         if(++pos >= wild.length) /* if finished */
284            return true;
285      }
286      return false;
287   }
288
289
290   /**
291    * This is used to cache resolutions made so that the matches can
292    * be acquired the next time without performing the resolution.
293    * This is an LRU cache so regardless of the number of resolutions
294    * made this will not result in a memory leak for the resolver.
295    * 
296    * @author Niall Gallagher
297    */ 
298   private class Cache extends LimitedCache<List<M>> {
299
300      /**
301       * Constructor for the <code>Cache</code> object. This is a
302       * constructor that creates the linked hash map such that 
303       * it will purge the entries that are oldest within the map.
304       */ 
305      public Cache() {      
306         super(1024);              
307      }
308   }
309
310   /**
311    * This is used to store the <code>Match</code> implementations in
312    * resolution order. Resolving the match objects is performed so
313    * that the last inserted match object is the first used in the
314    * resolution process. This gives priority to the last inserted.
315    * 
316    * @author Niall Gallagher
317    */ 
318   private class Stack extends LinkedList<M> {     
319
320      /**
321       * The <code>push</code> method is used to push the match to 
322       * the top of the stack. This also ensures that the cache is
323       * cleared so the semantics of the resolver are not affected.
324       *
325       * @param match this is the match to be inserted to the stack
326       */            
327      public void push(M match) {
328         cache.clear();
329         addFirst(match);                       
330      }
331
332      /**
333       * The <code>purge</code> method is used to purge a match from
334       * the provided position. This also ensures that the cache is
335       * cleared so that the semantics of the resolver do not change.
336       *
337       * @param index the index of the match that is to be removed
338       */ 
339      public void purge(int index) {
340         cache.clear(); 
341         remove(index);         
342      }
343
344      /**
345       * This is returned from the <code>Resolver.iterator</code> so
346       * that matches can be iterated in insertion order. When a
347       * match is removed from this iterator then it clears the cache
348       * and removed the match from the <code>Stack</code> object.
349       * 
350       * @return returns an iterator to iterate in insertion order
351       */ 
352      public Iterator<M> sequence() {
353         return new Sequence();              
354      }
355
356      /**
357       * The is used to order the <code>Match</code> objects in the
358       * insertion order. Iterating in insertion order allows the
359       * resolver object to be serialized and deserialized to and
360       * from an XML document without disruption resolution order.
361       *
362       * @author Niall Gallagher
363       */
364      private class Sequence implements Iterator<M> {
365
366         /**
367          * The cursor used to acquire objects from the stack.
368          */               
369         private int cursor;
370
371         /**
372          * Constructor for the <code>Sequence</code> object. This is
373          * used to position the cursor at the end of the list so the
374          * first inserted match is the first returned from this.
375          */ 
376         public Sequence() {
377            this.cursor = size();                 
378         }
379
380         /**
381          * This returns the <code>Match</code> object at the cursor
382          * position. If the cursor has reached the start of the 
383          * list then this returns null instead of the first match.
384          * 
385          * @return this returns the match from the cursor position
386          */ 
387         public M next() {
388            if(hasNext()) {
389                return get(--cursor);
390            }           
391            return null;     
392         }    
393
394         /**
395          * This is used to determine if the cursor has reached the
396          * start of the list. When the cursor reaches the start of
397          * the list then this method returns false.
398          * 
399          * @return this returns true if there are more matches left
400          */ 
401         public boolean hasNext() {
402            return cursor > 0;
403         }
404
405         /**
406          * Removes the match from the cursor position. This also
407          * ensures that the cache is cleared so that resolutions
408          * made before the removal do not affect the semantics.
409          */ 
410         public void remove() {                    
411            purge(cursor);                
412         }        
413      }
414   } 
415}