package page.tools.xml;

import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public class DOMRSSParser {

    private DateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z");
    
    public DOMRSSParser() {
     
    }
    
    public RSSChannel getChannel(InputStream in) 
        throws ParserConfigurationException, SAXException, IOException, ParseException {
        
        List<RSSChannel> channels = getChannels(in);
        if (channels == null || channels.size() == 0) return null;
        return channels.get(0);
    }
    
    public List<RSSChannel> getChannels(InputStream in) 
        throws ParserConfigurationException, SAXException, IOException, ParseException {

        List<RSSChannel> rssChannels = new ArrayList<RSSChannel>();
        
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document doc = builder.parse(in);

        NodeList channels = doc.getElementsByTagName("channel");
        for (int i = 0; i < channels.getLength(); i++) {
            Element channel = (Element) channels.item(i);
            
            NodeList titleNode = channel.getElementsByTagName("title");
            Element line = (Element) titleNode.item(0);
            String title = getCharacterDataFromElement(line);
            
            NodeList descriptionNode = channel.getElementsByTagName("description");
            line = (Element) descriptionNode.item(0);
            String description = getCharacterDataFromElement(line);   
            
            RSSChannel rssChannel = new RSSChannel();
            rssChannel.setTitle(title);
            rssChannel.setDescription(description);
            
            NodeList items = channel.getElementsByTagName("item");
            for (int j = 0; j < items.getLength(); j++) {
                Element item = (Element) items.item(j);
            
                titleNode = item.getElementsByTagName("title");
                line = (Element) titleNode.item(0);
                title = getCharacterDataFromElement(line);
                
                descriptionNode = item.getElementsByTagName("description");
                line = (Element) descriptionNode.item(0);
                description = getCharacterDataFromElement(line);  
                
                RSSItem rssItem = new RSSItem(title, description);
                
                NodeList linkNode = item.getElementsByTagName("link");
                line = (Element) linkNode.item(0);
                rssItem.setLink(getCharacterDataFromElement(line)); 
                
                NodeList pubDateNode = item.getElementsByTagName("pubDate");
                line = (Element) pubDateNode.item(0);
                rssItem.setPubDate(dateFormat.parse(getCharacterDataFromElement(line)).getTime());                 
                
                rssChannel.addItem(rssItem);
            }
            
            rssChannels.add(rssChannel);
        }
        
        return rssChannels;
    }
    
    public static String getCharacterDataFromElement(Element e) {
        Node child = e.getFirstChild();
        if (child instanceof CharacterData) {
            CharacterData cd = (CharacterData) child;
            return cd.getData().replaceAll("\\<.*?>","");
        }
        return "?";
    }
}

