package nl.uvt.commons.io;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Properties;

import nl.uvt.commons.io.Http;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.ParsingException;
import nu.xom.ValidityException;

import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;


public class TagsoupUtils {
	
	public static Document fileGetTagSoupDocument( File f ) throws 
	SAXException, ValidityException, ParsingException, IOException{
		Document result = null;
		FileInputStream is = new FileInputStream(f);
		InputStreamReader isr = new InputStreamReader( is, "UTF-8");
		if ( f != null ){
			XMLReader tagsoup = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
			Builder builder = new Builder( tagsoup );
			result = builder.build( isr );
		}
		return result;
	}

	public static Document httpGetTagSoupDocument( URL url, Properties args, Properties httpProps )
	throws UnsupportedEncodingException, MalformedURLException, IOException, ValidityException, ParsingException, SAXException {
		nu.xom.Document responseDocument = null;
		InputStream is = Http.HttpGetInputStream(url, args, httpProps);
		XMLReader tagsoup = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
		Builder builder = new Builder( tagsoup );
		responseDocument =  builder.build(is);
		is.close();
		return responseDocument;
	}

}
