/*
DTD parser library for Java.
Copyright (C) 2000 Christopher R. Jones

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

package org.menagery.dtd;

import java.io.*;
import java.util.*;

/**
 *  Non-threaded DTD reader and parser.
 *  Creates a DTD tree.
 */
public class Parser
{
  private static String DTD_ELEMENT = "!ELEMENT ";
  private static String DTD_ATTLIST = "!ATTLIST ";
  private static String DTD_ENTITY = "!ENTITY ";
  
  /**
   *  Given a file reference, parses the file and creates a DTD tree.
   *
   *  @param fileDTD The file referenceccontaining the DTD to be parsed
   *  @return The DTD tree, null if it was an unreadable DTD.
   *  @exception IOException
   *  @exception FileNotFoundException
   */
  public static DTD parse(File fileDTD) throws IOException, 
    FileNotFoundException {
    BufferedReader br = new BufferedReader(new FileReader(fileDTD));
    
    DTD dtd = new DTD();
    
    // pass the dtd and reader into the scanner
    scan(dtd, br);
    
    br.close();
    br = null;
    
    return dtd;
  }
  
  /**
   *  Scans a line of input. Places the results in the DTD.
   *
   *  Note that the getElement, getAttlist, and getEntity methods
   *  are responsible for completing their DTD tags. If they can't
   *  complete the tag for whatever reason, they return null to this
   *  method and the bad element definition is skipped. (No, this is
   *  not a validating parser.)
   *
   *  @param dtd The DTD object that gets the results.
   *  @param br The BufferedReader object that has a handle on the DTD file.
   *  @exception IOException
   */
  static void scan(DTD dtd, BufferedReader br) throws IOException {
    String sLine = "";
    String sInLine = "";
    String sDTD = "";
    
    Element elem;
    Attlist attl;
    Entity enty;
    
    int iEndTag = -1;
    
    // prepare the DTD file (join lines, etc.) to make it easier to parse
    // the goal is to have one element or item per line
		// while not EOF
    do {
      sDTD = sDTD + " " +sInLine.trim();
      
      // get the next line from the buffered reader
      sInLine = br.readLine();
    } while (sInLine != null); 
    
    // tokenize the string based on opening tags
    for (StringTokenizer st = new StringTokenizer(sDTD.trim(), "<"); 
	 st.hasMoreElements(); ) {
      // get the next tag (take off any trailing space)
      sLine = st.nextToken().trim();
      
      // take off the closing bracket
      sLine = sLine.substring(0, sLine.length() - 1);
      
      // determine the type of tag
      if (sLine.startsWith(DTD_ELEMENT)) {
	// likely contains an element definition
	elem = getElement(sLine);
	
	// if the element was successfully retrieved
	if (elem != null) {
	  // add the element to the tree
	  dtd.addElement(elem);
	}
      } else if (sLine.startsWith(DTD_ATTLIST)) {
	// likely contains an attlist definition
	attl = getAttlist(sLine);
	
	// if the attlist was successfully retrieved
	if (attl != null) {
	  // add the attlist to the tree
	  dtd.addAttlist(attl);
	}
      } else if (sLine.startsWith(DTD_ENTITY)) {
	// likely contains an entity definition
	enty = getEntity(sLine, dtd);
	
	// if the entity was successfully retrieved
	if (enty != null) {
	  // add the entity to the tree
	  dtd.addEntity(enty);
	}
      } else {
	// probably a comment or something we should ignore
	// do nothing at this time
      }
    } // end while
  }
  
  /**
   *  Retrieves a complete element definition from the DTD file.
   *
   *  If the element is incomplete or malformed, it will skip the
   *  definition and return null instead.
   *
   *  @param sLine The line containing the start of the element definition.
   *  @param br The BufferedReader holding open the DTD file, in case the definition spans lines.
   *  @return An Element object containing the DTD element, or null if the element was malformed.
   *  @exception IOException
   */
  public static Element getElement(String sLine) throws IOException {
    String sElemName = "";
    String sContents = "";
    Element elem = null;
    
    // get the element name from the line
    // this is the word following <!ELEMENT
    int iFirstSpace = sLine.indexOf(" ");
    int iSecondSpace = sLine.indexOf(" ", iFirstSpace + 1);
    
    sElemName = sLine.substring(iFirstSpace + 1, iSecondSpace);
    
    // got the element name
    // now, get the contents
    sContents = sLine.substring(iSecondSpace + 1, sLine.length());
    
    // okay, we've got everything we need to build the element
    // the element is responsible for parsing the contents
    // the dtd object will figure out where to stick the element in the tree
    // check to make sure the contents aren't 'EMPTY'...
    if (sContents.toUpperCase().equals("EMPTY")) {
      // use the EMPTY element constructor
      elem = new Element(sElemName);
    } else {
      elem = new Element(sElemName, sContents.substring(0, sContents.length()));
    }
    
    return elem;
  }
  
  /**
   *  Retrieves a complete ATTLIST element as an Attlist object from the dtd.
   *
   *  Attributes are 1:1 associated with elements. The association of the 
   *  attribute to the element takes place inside the DTD object.
   *
   *  @param sLine The input line with the ATTLIST tag.
   *  @param br The BufferedReader for retrieving additional lines, if needed.
   *  @return The new Attlist object, or null if a malformed tag was discovered.
   *  @exception IOException
   */ 
  private static Attlist getAttlist(String sLine) throws IOException {
    String sElemName = "";
    String sContents = "";
    Vector vAttribute = new Vector();
    
    // get the element name from the line
    // this is the word following <!ATTLIST
    int iFirstSpace = sLine.indexOf(" ");
    int iSecondSpace = sLine.indexOf(" ", iFirstSpace + 1);
    
    sElemName = sLine.substring(iFirstSpace + 1, iSecondSpace);
    
    // got the element name
    // the Attribute object is responsible for sorting Attlist out
    sContents = sLine.substring(iSecondSpace + 1, sLine.length());
    
    // okay, we've got everything we need to build the attribute
    // the Attribute object is responsible for parsing the contents
    // the dtd object will figure out where to associate the 
    // attribute in the tree
    return new Attlist(sElemName, sContents);
  }
  
  /**
   *  Gets the entity definition as an Entity object.
   *
   *  @param sLine The input line.
   *  @param br The BufferedReader for pulling more lines from the input.
   *  @return The new Entity object.
   *  @exception IOException
   */
  private static Entity getEntity(String sLine, DTD dtd) throws IOException {
    String sEntity = "";
    String sContents = "";
    boolean bResolve = false;

    // get the entity name
    // this is the word following <!ENTITY
    int iFirstSpace = sLine.indexOf(" ");
    int iSecondSpace = sLine.indexOf(" ", iFirstSpace + 1);
    
    sEntity = sLine.substring(iFirstSpace + 1, iSecondSpace);

    if (sEntity.equals("%")) {
      // got an external entity reference that we must resolve
      bResolve = true;
      iFirstSpace = iSecondSpace;
      iSecondSpace = sLine.indexOf(" ", iFirstSpace + 1);
      sEntity = sLine.substring(iFirstSpace + 1, iSecondSpace);
    }
    
    // got the entity name
    // now, get the contents
    // everything else is the content of the entity...it usually looks like:
    // [Entity Reference] SYSTEM [uri]
    // [Entity Reference] [uri]
    // This is something that the entity object will figure out
    sContents = sLine.substring(iSecondSpace + 1, sLine.length());
    
    return new Entity(sEntity, sContents, dtd, bResolve);
  }
  
  /**
   *  Unit test.
   */
  public static void main(String [] args)
  {
    try {
      File f = new File("./sample.dtd");
      
      DTD dtd = Parser.parse(f);
      
      // rewrite the DTD to stdout
      for (Enumeration en = dtd.getElements();
	   en.hasMoreElements(); ) {
	System.out.println((Element)en.nextElement());
      }

      for (Enumeration en = dtd.getEntities();
	   en.hasMoreElements(); ) {
	System.out.println((Entity)en.nextElement());
      }
      /*
	System.out.println(dtd.getElementByName("MemberNetSurvey:Questions"));
	
	System.out.println(dtd.getElementByName("MemberNetSurvey:Question"));
	
	Element elem = dtd.getElementByName("MemberNetSurvey:Question");
	
	OptionList ol = elem.getOptionList();
	
	Option o = (Option)ol.getOptions().elementAt(0);
	
	System.out.println(o.getElement());
	*/
    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
    }
  }
}

