/*--

 Copyright 2003 Jan Pavlovic and Tomas Pitner.
 Masaryk University in Brno, Czech Republic
 All rights reserved.
 
 This code is based on the work copyrighted by

 Copyright 2001-2003 Elliotte Rusty Harold.
 All rights reserved.

    This file is part of XIncluder, a Java class library for integrating XInclude
    processing with SAX, DOM, and JDOM. 
`
    XIncluder is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License version 2.1 
    as published by the Free Software Foundation.

    XIncluder is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with XIncluder; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL ELLIOTTE RUSTY HAROLD OR ANY
 OTHER CONTRIBUTORS TO THIS PACKAGE
 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.elharo.xml.xinclude;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.EntityResolver;
import org.xml.sax.Locator;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.XMLFilterImpl;
import org.xml.sax.helpers.NamespaceSupport;
import org.xml.sax.helpers.AttributesImpl;

import java.net.URL;
import java.net.URLConnection;
import java.net.MalformedURLException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.Stack;

/**
 * <p>
 *  This is a SAX filter which resolves all XInclude include elements
 *  before passing them on to the client application. Currently this
 *  class has the following known deviation from the XInclude specification:
 * </p>
 *  <ol>
 *   <li>XPointer is not supported.</li>
 *  </ol>
 *
 *  <p>
 *    Extensions made by JP and TP:<br/>
 *    the URL of the included TEXT document (i.e. if <code>parse='text'</code>)
 *    can be in one of the following forms (lines are numbered starting at 1):
 *    
 *    <ul>
 *    <li><code>textfileurl#startLineNumber</code>    will include only the line with startLineNumber<br/>
 *    <li><code>textfileurl#startLineNumber$count</code>    will include count lines beginning from startLineNumber<br/>
 *    <li><code>textfileurl#startLineNumber-endLineNumber</code>    will include all lines 
 beginning from startLineNumber and ending to (including) line endLineNumber<br/>
 *    <li><code>textfileurl#startLineNumber-</code>    will include all lines 
 beginning from to the end of file<br/>
 *    <li><code>textfileurl#$countOfLines</code> or <code>textfileurl#-countOfLines</code>  will include first countOfLines lines 
 from the file<br/>
 *    </ul>
 *  </p>
 *
 *  <p>
 *    Furthermore, I would definitely use a new instance of this class
 *    for each document you want to process. I doubt it can be used
 *    successfully on multiple documents. Furthermore, I can virtually
 *    guarantee that this class is not thread safe. You have been
 *    warned.
 *  </p>
 *
 *  <p>
 *    Since this class is not designed to be subclassed, and since
 *    I have not yet considered how that might affect the methods 
 *    herein or what other protected methods might be needed to support 
 *    subclasses, I have declared this class final. I may remove this 
 *    restriction later, though the use-case for subclassing is weak.
 *    This class is designed to have its functionality extended via a
 *    a horizontal chain of filters, not a 
 *    vertical hierarchy of sub and superclasses.
 *  </p>
 *
 *  <p>
 *    To use this class: 
 *  </p>
 *  <ol>
 *   <li>Construct an <code>XIncludeFilter</code> object with a known base URL</li>
 *   <li>Pass the <code>XMLReader</code> object from which the raw document will 
 *       be read to the <code>setParent()</code> method of this object. </li>
 *   <li>Pass your own <code>ContentHandler</code> object to the 
 *       <code>setContentHandler()</code> method of this object. This is the 
 *       object which will receive events from the parsed and included
 *       document.
 *   </li>
 *   <li>Optional: if you wish to receive comments, set your own 
 *       <code>LexicalHandler</code> object as the value of this object's
 *       http://xml.org/sax/properties/lexical-handler property.
 *       Also make sure your <code>LexicalHandler</code> asks this object 
 *       for the status of each comment using <code>insideIncludeElement</code>
 *       before doing anything with the comment. 
 *   </li>
 *   <li>Pass the URL of the document to read to this object's 
 *       <code>parse()</code> method</li>
 *  </ol>
 * 
 *  <p> e.g.</p>
 *  <pre><code>XIncludeFilter includer = new XIncludeFilter(base); 
 *  includer.setParent(parser);
 *  includer.setContentHandler(new SAXXIncluder(System.out));
 *  includer.parse(args[i]);</code>
 *  </pre>
 * </p>               
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d11, March 9, 2003
 */
public final class XIncludeFilter extends XMLFilterImpl {

	/** 
	 * the number of the first line to be included
	 * is specified in the URL after the <code>term1</code> String 
	 */
	protected static final String term1 = "#";

	/** 
	 * the number of lines to be included
	 * is specified in the URL after the <code>term2</code> String 
	 */
    protected static final String term2 = "$";

	/** 
	 * the number of the last line to be included
	 * is specified in the URL after the <code>term3</code> String 
	 */
    protected static final String term3 = "-";


    public final static String XINCLUDE_NAMESPACE
     = "http://www.w3.org/2001/XInclude";

    private Stack bases = new Stack();
    private Stack locators = new Stack();

/*    private EntityResolver resolver;
    
    public XIncludeFilter() {
        this(null);   
    }    
    
    public XIncludeFilter(EntityResolver resolver) {
        this.resolver = resolver;   
    }   */ 
    
    
    // what if this isn't called????
    // do I need to check this in startDocument() and push something
    // there????
    public void setDocumentLocator(Locator locator) {
        locators.push(locator);
        String base = locator.getSystemId();
        try {
             bases.push(new URL(base));
        }
        catch (MalformedURLException e) {
            throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base);
        }
        super.setDocumentLocator(locator);
    }
    
    
    // necessary to throw away contents of non-empty XInclude elements
    private int level = 0;

  /**
    * <p>
    * This utility method returns true if and only if this reader is 
    * currently inside a non-empty include element. (This is <strong>
    * not</strong> the same as being inside the node set which replaces
    * the include element.) This is primarily needed for comments
    * inside include elements. It must be checked by the actual
    * LexicalHandler to see whether a comment is passed or not.
    * </p>
    *
    * @return boolean  
    */
    public boolean insideIncludeElement() {
      
        return level != 0;
      
    }
    
    
    public void startElement(String uri, String localName,
      String qName, Attributes atts) throws SAXException {
    
        if (level == 0) { // We're not inside an xi:include element

            // Adjust bases stack by pushing either the new
            // value of xml:base or the base of the parent
            String base = atts.getValue(NamespaceSupport.XMLNS, "base");
            URL parentBase = (URL) bases.peek();
            URL currentBase = parentBase;
            if (base != null) {
                try {
                    currentBase = new URL(parentBase, base); 
                }
                catch (MalformedURLException e) {
                    throw new SAXException("Malformed base URL: " 
                     + currentBase, e);
                }
            }
            bases.push(currentBase);
          
            if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) {
                // include external document
                String href = atts.getValue("href");
                // Verify that there is an href attribute
                if (href==null) { 
                    throw new SAXException("Missing href attribute");
                }
                
                String parse = atts.getValue("parse");
                if (parse == null) parse = "xml";
                
                if (parse.equals("text")) {
                    String encoding = atts.getValue("encoding");
                    includeTextDocument(href, encoding); 
                }
                else if (parse.equals("xml")) {
                    includeXMLDocument(href); 
                }
                // Need to check this also in DOM and JDOM????
                else {
                    throw new SAXException(
                      "Illegal value for parse attribute: " + parse);
                }
                level++;
            }
            else {
                if (atRoot) {
                   // add xml:base attribute if necessary
                   AttributesImpl attsImpl = new AttributesImpl(atts);
                   attsImpl.addAttribute(NamespaceSupport.XMLNS, "base", 
                     "xml:base", "CDATA", currentBase.toExternalForm());  
                   atts = attsImpl;
                   atRoot = false;
                }
                super.startElement(uri, localName, qName, atts);
            } 
        
        }  
      
    }

    public void endElement (String uri, String localName, String qName)
      throws SAXException {
        
        if (uri.equals(XINCLUDE_NAMESPACE) 
           && localName.equals("include")) {
            level--;
        }
        else if (level == 0) {
            bases.pop();
            super.endElement(uri, localName, qName);
        }
        
    }

    private int depth = 0;
     
    public void startDocument() throws SAXException {
        level = 0;
        if (depth == 0) super.startDocument(); 
        depth++;        
    }
    
    public void endDocument() throws SAXException {
      
        locators.pop();
        bases.pop(); // pop the URL for the document itself
        depth--;
        if (depth == 0) super.endDocument();
                
    }
    
    // how do prefix mappings move across documents????
    public void startPrefixMapping(String prefix, String uri)
      throws SAXException {
        if (level == 0) super.startPrefixMapping(prefix, uri);
    }
    
    public void endPrefixMapping(String prefix)
      throws SAXException {
        if (level == 0) super.endPrefixMapping(prefix);        
    }

    public void characters(char[] ch, int start, int length) 
      throws SAXException {
        
        if (level == 0) super.characters(ch, start, length);
    
    }

    public void ignorableWhitespace(char[] ch, int start, int length)
      throws SAXException {
        if (level == 0) super.ignorableWhitespace(ch, start, length);
    }

    public void processingInstruction(String target, String data)
      throws SAXException {
        if (level == 0) super.processingInstruction(target, data);
    }

    public void skippedEntity(String name) throws SAXException {
        if (level == 0) super.skippedEntity(name);
    }

    // convenience method for error messages
    private String getLocation() {
      
        String locationString = "";
        Locator locator = (Locator) locators.peek();
        String publicID = "";
        String systemID = "";
        int column = -1;
        int line = -1;
        if (locator != null) {
            publicID = locator.getPublicId();
            systemID = locator.getSystemId();
            line = locator.getLineNumber();
            column = locator.getColumnNumber();
        }
        locationString = " in document included from " + publicID
          + " at " + systemID 
          + " at line " + line + ", column " + column;

        return locationString;
        
    }
    
  /**
    * <p>
    * This method reads URL and return the first line to read
    * </p>
    *
    * @param  url          URL of the document that will be read
    * @return int  
    *
    * @author Jan Pavlovic, Tomas Pitner
    * @version May 2003
    */
    protected int getLineBegin(String url) {

		int indexTerm1 = url.indexOf(term1) + 1; // position after #
		
    	if (indexTerm1 == 0) { // if no # return 0
    	 	return 0; 
    	}
    	
    	int separatorIndex  = url.indexOf(term2); // from # until $
    	if (separatorIndex  < 0)
    		separatorIndex  = url.indexOf(term3); // from # until -
    	if (separatorIndex  < 0)
    		separatorIndex  = url.length(); // take whole URL
    		
    	if (indexTerm1 == separatorIndex) { // if #$ or #- return 1
    		return 1;
    	}
    		
	    String begin = url.substring(indexTerm1, separatorIndex).trim();
	    
		try {
			// System.err.println("line-begin: parsing begin='"+begin+"'");
			return Integer.parseInt(begin);
		} 
		catch (NumberFormatException nfe) {
		    return 0;
		}
    }


  /**
    * <p>
    * This method reads URL and return the amount of line line to read
    * </p>
    *
    * @param  url          URL of the document that will be read
    * @return int  
    *
    * @author Jan Pavlovic, Tomas Pitner
    * @version May 2003
    */
    protected int getLineCount(String url) {
    
    	if (url.indexOf(term1) == -1) { 
    	 	return Integer.MAX_VALUE; 
    	}
    	
    	int indexTerm2 = url.indexOf(term2)+1;
    	int indexTerm3 = url.indexOf(term3)+1;
    	//System.err.println("line-count: indexTerm2="+indexTerm2+", indexTerm3="+indexTerm3+", url.length="+url.length());
    	
		// if no $ neither - then include one line:
		if (indexTerm2 == 0 && indexTerm3 == 0) { 
			return 1;
		}

		// if - at the end of the url then include all lines to the end:
    	if (indexTerm3 == url.length()) {
    		return Integer.MAX_VALUE;
    	}	
    	
	    String count = url.substring(indexTerm2).trim(); // from $ to end
	    String last  = url.substring(indexTerm3).trim(); // from - to end

		try {
			// if '$' (count of lines) specified
			if (indexTerm2 > 0) {       
				// System.err.println("line-count: parsing count='"+count+"'");
				int iCount = Integer.parseInt(count);
				return iCount;
				
			// if '-' (last line number) specified	
			} else { 
				// System.err.println("line-count: parsing last='"+last+"'");
				int iLast = Integer.parseInt(last);
				return iLast + 1 - getLineBegin(url);
			}
		} 
		catch (NumberFormatException nfe) {
		    return 0;
		}
    }


  /**
    * <p>
    * This utility method reads a document at a specified URL
    * and fires off calls to <code>characters()</code>.
    * It's used to include files with <code>parse="text"</code>
    * </p>
    *
    * @param  url          URL of the document that will be read
    * @param  encoding     Encoding of the document; e.g. UTF-8, 
    *                      ISO-8859-1, etc.
    * @return void  
    * @throws SAXException if the requested document cannot
                           be downloaded from the specified URL
                           or if the encoding is not recognized
    */
    private void includeTextDocument(String url, String encoding) 
      throws SAXException {

		int begin = getLineBegin(url);
		int count = getLineCount(url);
		
		// System.err.println("include-text: begin="+begin+" count="+count);
		
		// if count=0, i.e. include no lines, then return immediately
		if (count == 0) {
			return;
		} 

        if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8"; 
        URL source;
        try {
            URL base = (URL) bases.peek();
            source = new URL(base, url);
        }
        catch (MalformedURLException e) {
            UnavailableResourceException ex =
              new UnavailableResourceException("Unresolvable URL " + url
              + getLocation());
            ex.setRootCause(e);
            throw new SAXException("Unresolvable URL " + url + getLocation(), ex);
        }
        
        try {
            URLConnection uc = source.openConnection();
            InputStream in = new BufferedInputStream(uc.getInputStream());
            String encodingFromHeader = uc.getContentEncoding();
            String contentType = uc.getContentType();
            if (encodingFromHeader != null) encoding = encodingFromHeader;
            else {
                // What if file does not have a MIME type but name ends in .xml????
                // MIME types are case-insensitive
                // Java may be picking this up from file URL
                if (contentType != null) {
                    contentType = contentType.toLowerCase();
                    if (contentType.equals("text/xml") 
                      || contentType.equals("application/xml")   
                      || (contentType.startsWith("text/") && contentType.endsWith("+xml") ) 
                      || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) {
                         encoding = EncodingHeuristics.readEncodingFromStream(in);
                    }
                }
            }

			if (begin != 0 ) {
	            BufferedReader br = new BufferedReader(new InputStreamReader(in, encoding));
                String ls = System.getProperty("line.separator");
                String line = br.readLine();
	            while (line != null) {
                	String newLine = br.readLine();
	                if (--begin <= 0 && --count >= 0) {
	                    if (count > 0 && newLine != null) { 
	                    	line = line + ls; 
	                    }
						this.characters(line.toCharArray(), 0, line.length());
	                }
	                line = newLine;
	            }
	        }
	        else {
	            InputStreamReader reader = new InputStreamReader(in, encoding);
	            char[] c = new char[1024];
	            while (true) {
	                int charsRead = reader.read(c, 0, 1024);
	                if (charsRead == -1) break;
	                if (charsRead > 0) this.characters(c, 0, charsRead);
	            }
	        }


        }
        catch (UnsupportedEncodingException e) {
            throw new SAXException("Unsupported encoding: " 
             + encoding + getLocation(), e);
        }
        catch (IOException e) {
            throw new SAXException("Document not found: " 
             + source.toExternalForm() + getLocation(), e);
        }

    }
    
    private boolean atRoot = false;

  /**
    * <p>
    * This utility method reads a document at a specified URL
    * and fires off calls to various <code>ContentHandler</code> methods.
    * It's used to include files with <code>parse="xml"</code>
    * </p>
    *
    * @param  url          URL of the document that will be read
    * @return void  
    * @throws SAXException if the requested document cannot
                           be downloaded from the specified URL.
    */
    private void includeXMLDocument(String url) 
      throws SAXException {

        URL source;
        try {
            URL base = (URL) bases.peek();
            source = new URL(base, url);
        }
        catch (MalformedURLException e) {
            UnavailableResourceException ex =
              new UnavailableResourceException("Unresolvable URL " + url
              + getLocation());
            ex.setRootCause(e);
            throw new SAXException("Unresolvable URL " + url + getLocation(), ex);
        }
        
        try {
            // make this more robust
            XMLReader parser; 
            try {
                parser = XMLReaderFactory.createXMLReader();               
            } 
            catch (SAXException e) {
                try {
                    parser = XMLReaderFactory.createXMLReader(
                      "org.apache.xerces.parsers.SAXParser"
                    );
                }
                catch (SAXException e2) {
                    System.err.println("Could not find an XML parser");
                    return;
                }
            }
            parser.setContentHandler(this);
            EntityResolver resolver = this.getEntityResolver();
            if (resolver != null) parser.setEntityResolver(resolver);
            // save old level and base
            int previousLevel = level;
            this.level = 0;
            if (bases.contains(source)) {
                Exception e = new CircularIncludeException(
                  "Circular XInclude Reference to " + source + getLocation()
                );
                throw new SAXException("Circular XInclude Reference", e);
            }
            bases.push(source);
            atRoot = true;
            parser.parse(source.toExternalForm());
            // restore old level and base
            this.level = previousLevel;
            bases.pop();
        }
        catch (IOException e) {
            throw new SAXException("Document not found: " 
             + source.toExternalForm() + getLocation(), e);
        }

    }
        
}