package org.wikiwebserver.core;

import java.io.EOFException;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * a StructuredDataReader is used to read structured data from a character stream.
 * 
 * Supports reading a collection of key-value pairs into a Map data structure and 
 * reading a CSV into a 2 dimensional list of strings.
 * 
 * @author Michael Gardiner 
 * @version 2008-10-20
 */
public class StructuredDataReader {
	
    /**
     * This character is used to surround a field that requires encapsulation.
     * Encapsulation ensures that the field data is held together and not
     * misinterpreted as separate data. When a field needs to include this 
     * character as part of its data, it must be paired with itself.
     */
    public static final char FIELD_ENCAPSULATE = StructuredDataWriter.FIELD_ENCAPSULATE;
    
    
    // A PushbackReader is required to unread data when parsing structure.
    private PushbackReader reader;
    private boolean multiStructure = false;
    
    /**
     * Constructs a new StructuredDataReader for reading structured data from
     * the specified character stream.
     * 
     * @param reader The reader where the structured data is to be read from.
     */        
    public StructuredDataReader(Reader reader) {
    	this(reader, false);
    }      
    
    
    /**
     * Constructs a new StructuredDataReader for reading structured data from
     * the specified character stream.
     * 
     * If multiStructure is true, multiple maps and tables can be read from
     * the same reader.  These are separated by blank lines.
     * 
     * @param multiStructure Enable multiple table and map reading.
     * @param reader The reader where the structured data is to be read from.
     */        
    public StructuredDataReader(Reader reader, boolean multiStructure) {
    	
    	if (!(reader instanceof PushbackReader)) {
    		// A PushbackReader is required
    		reader = new PushbackReader(reader, 2);
    	}
        this.reader = (PushbackReader) reader;
        this.multiStructure = multiStructure;
    }    
    
    public List<List<String>> readRows() throws IOException {
    	return readRows(',', '\n');
    }       
    
    /**
     * Reads a 2 dimensional list of strings (a table) from the reader.
     * 
     * @param colSeparator The character that marks the end of a column
     * @param rowSeparator The character that marks the end of a row
     * @return A 2 dimensional list of strings representing a table
     * 
     * @throws IOException a problem reading the data from the reader or if the
     *         source contains formatting errors such as incomplete encapsulated fields,
     *         corrupt encapsulated fields, missing rows separators or no
     *         more rows or fields to read.
     */      
    public List<List<String>> readRows(char colSeparator, char rowSeparator) throws IOException {   
    	
        List<List<String>> rows = new ArrayList<List<String>>();
        List<String> columns = readColumns(colSeparator, rowSeparator);
        while (columns != null) {     	
        	if (multiStructure) {
            	// A blank row indicates end of rows     		
	        	if (columns.size() == 1 && columns.get(0) == null) {
	                return rows;
	        	}
        	}
            rows.add(columns);           	
        	columns = readColumns(colSeparator, rowSeparator);
        }
        return rows;
    }    
    
    public List<String> readColumns() throws IOException {
    	return readColumns(',', '\n');
    }       
    
    /**
     * Reads a 1 dimensional list of strings (a row) from the reader.
     * 
     * @param colSeparator The character that marks the end of a column
     * @param rowSeparator The character that marks the end of a row 
     * @return A 1 dimensional list of strings representing a row
     * 
     * @throws IOException a problem reading the data from the reader or if the
     *         source contains formatting errors such as incomplete encapsulated fields,
     *         corrupt encapsulated fields, missing row separators or no
     *         more rows or fields to read.
     */    
    public List<String> readColumns(char colSeparator, char rowSeparator) throws IOException {  
    	
    	// Check not at end of stream
    	int next = read();
        if (next == -1) return null; 
        unread(next);
    	
        List<String> record = new ArrayList<String>();

        record.add(readField(colSeparator, rowSeparator));
        next = read();                
        while (next == colSeparator) {
            record.add(readField(colSeparator, rowSeparator));
            next = read(); 
        }
        if (next != -1 && next != rowSeparator) {
            throw new IOException("Row seperator expected, (" + (char)next + " found)");   
        }
            
        
        return record;
    }    
    
    /**
     * Reads a Map of key value pairs from the reader.
     * 
     * Lines prefixed with # will not be processed.
     * 
     * @param keyValueSeparator The character that joins keys to values
     * @param entrySeparator The character that marks the end of a key-value pair
     * @return A Map of key value pairs
     * 
     * @throws IOException a problem reading the data from the reader or if the
     *         data contains formatting errors such as incomplete encapsulated fields,
     *         corrupt encapsulated fields, missing keyValueSeparator separators or no
     *         more fields to read.
     */      
    public Map<String, String> readPairs(char keyValueSeparator, char entrySeparator) throws IOException {   
        
    	HashMap<String, String> map = new HashMap<String, String>();
    	while (true) {
    		
    		int next = read();
    		if (next == -1) return map;
    		else if (next == '#') skipAndStopAfter('\n');
    		else {
	    		unread(next);
	    		
	        	String key = readField(keyValueSeparator, entrySeparator);
	        	String value = null;
	        	next = read();
	        	if (next == keyValueSeparator) {
	        		value = readField(entrySeparator);
	        		next = read();
	        		// Last entry has been read   		
	        		if (next != entrySeparator) {
	        			map.put(key, value);
	        			return map;
	        		}
	        	}
	        	else if (multiStructure && next == entrySeparator) {
	        		// This is not a name value pair, assume end of map
	        		unread(next);
	        		return map;
	        	}
	        	else if (key == null && value == null) continue;
		    	map.put(key, value);
    		}
    	}
    }

    
    public void close() throws IOException {
        reader.close();
    }    
    
    protected int read() throws IOException {
    	return reader.read();
    }
    
    protected void unread(int c) throws IOException {
    	if (c != -1) reader.unread(c);
    }    
    
    protected String readField(char... termination) throws IOException, EOFException {

        skipWhiteSpace(termination);
        
        int first = read();
        
        if (first == -1) {
            throw new EOFException("No more fields or records");
        }      
        
        boolean encapsulated = (first == FIELD_ENCAPSULATE);

        if (encapsulated) {
        	return readEncapsulatedField(termination);
        }   
        else {
        	unread(first);
        	return readPlainField(termination);
        }
    }
    
    protected String readPlainField(char... termination) throws IOException {
    	
    	StringBuilder bill = new StringBuilder();      
    	
    	int next = read();
        while (!isEndOfField(next, termination)) {
            
            bill.append((char) next);
            next = read();
        }
        
        if (next > -1) unread(next);
        
        if (bill.length() == 0) return null;

        return trimWhiteSpace(bill.toString());    	
    }

    protected String readEncapsulatedField(char... termination) throws IOException {
    	
    	StringBuilder bill = new StringBuilder();
    	
    	int prev = read();
        int next = read();
        boolean escape = false;            
          
        while (true) {
            // Incomplete encapsulated field
            if (prev == -1) throw new IOException("Incomplete encapsulated field");            
            
            if (prev == FIELD_ENCAPSULATE) {
                escape = !escape;
                
                // Escaped quote
                if (!escape) bill.append(FIELD_ENCAPSULATE);
                
                // End of encapsulated field
                else if (next != FIELD_ENCAPSULATE) {
                    
                	skipWhiteSpace(termination);
                    
                    if (!isEndOfField(next, termination)) {
                        throw new IOException("Corrupt encapsulated field");            
                    }
                    else break;
                }     
            }
            else bill.append((char) prev);                  

            prev = next;
            next = read();
        }
        
        if (next > -1) unread(next);
        
        return bill.toString();    	
    }    
    
    protected boolean isEndOfField(int test, char... termination) {
    	if (test == -1) return true;
    	for (int i=0; i<termination.length; i++) {
    		if (test == termination[i]) {
    			return true;
    		}
    	}    	
    	return false;
    }
    
    protected void skipWhiteSpace(char... exclusions) throws IOException {
    	// Skip over spaces
    	int next = read();
        while (isWhiteSpace(next, exclusions)) {
        	next = read();
        }
        unread(next);
    }
    
    protected void skipAndStopAfter(char termination) throws IOException {
    	// Skip over spaces
    	int next = read();
        while (next != termination) {
        	next = read();
        }
    }    
    
    protected boolean isWhiteSpace(int c, char... exclusions) {
    	for (int i=0; i<exclusions.length; i++) {
    		if (c == exclusions[i]) {
    			return false;
    		}
    	}     	
    	switch (c) {
    		case ' ' : return true;
    		case '\t' : return true;
    		case '\n' : return true;
    		case '\r' : return true;
    		default : return false;
    	}
    }
    
    protected String trimWhiteSpace(String s) {
        
        if (s.length() == 0) return s;
        
        // Find leading whitespace
        int start = 0;
        while (isWhiteSpace(s.charAt(start))) start++;
        
        // Find trailing whitespace
        int end = s.length();
        while (isWhiteSpace(s.charAt(end-1))) end--;      
        
        return s.substring(start, end);
    }   
}

