/*
 * Created on Nov 30, 2009
 * 
 * Description: This file is meant to show how to run the image scrapers, as
 * well as how to downsize the images.
 */
package org.xenbase.scraper.runner;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;

import org.xenbase.scraper.BasicScraper;
import org.xenbase.scraper.Scraper_CurrBio_DevCell_Cell;
import org.xenbase.scraper.Scraper_DevDyn;
import org.xenbase.scraper.Scraper_Development;
import org.xenbase.scraper.Scraper_JCellBio;
import org.xenbase.scraper.Scraper_MechDev_DevBio;
import org.xenbase.scraper.Scraper_PNAS;
import org.xenbase.scraper.data.ScrapedData;
import org.xenbase.scraper.data.ScrapedImage;
import org.xenbase.utilities.AdvImageUtil;

public class ScraperStubRunner {
    private static class STRINGS {
        private static String usage = "Usage: run [url] [journal_type]\n" + 
        		"\t ==Journal Types== \n" + 
        		"\t 1: Current Biology, Developmental Cell, Cell \n" + 
        		"\t 2: Developmental Dynamics \n" + 
        		"\t 3: Development \n" + 
        		"\t 4: Mechanisms of Development, Developmental Biology \n" + 
        		"\t 5: Proceedings of the National Academy of Sciences \n" + 
        		"\t 6: Journal of Cell Biology \n"; 
    }

    public static void main(String[] args) {

        //Because this is a demo, error checking will be limited.
        if ((args.length == 0) || (args[0] == null) || (args[1] == null)) {
            System.out.println(STRINGS.usage);
            return;
        }
        
        //Get our input vars, 1) the URL we want to scrape, 2)The journal type
        String url = args[0];
        int scraperType = Integer.parseInt(args[1]);

        /*
         * We create an empty variable that our scraper will soon inhabit.
         * Obviously in a real implementation (as is the case in Xenbase) the
         * scraperType will not be supplied by the user, rather, they will be
         * passed automatically to the scraper as it assumed that the journal of
         * the URL provided is already known (as would be the case for URLs from
         * PubMed).
         */
        BasicScraper pm = null;
        switch (scraperType) {
	        case 1: pm = new Scraper_CurrBio_DevCell_Cell();break;
	        case 2: pm = new Scraper_DevDyn();break;
	        case 3: pm = new Scraper_Development();break;
	        case 4: pm = new Scraper_MechDev_DevBio();break;
	        case 5: pm = new Scraper_PNAS();break;
	        case 6: pm = new Scraper_JCellBio();break;
	        default: System.out.println(STRINGS.usage);return;
        }
        
        System.out.println("Scraper initialized.");
        try {
            

            //Because the actual article pages linked from pubmed are linked
            // through the DOI we have to call the scraper's RedirURL function, 
            //this will take the pubMed URL and trigger the resulting redirects, 
            //get to the journal publishers website and return the usable URL 
            //of the full journal article.
            String redirectedURL = pm.getRedirURL(url);

            //This actually gets all the images and captions, this is described
            // in greater detail in the accompanying documentation.
            ScrapedData results = pm.scrape(redirectedURL);
            System.out.println(results.getNumberScraped() + " images have been scraped from this article.");
            String captions = "";

            //Now we will iterate through each image, down size it, save it, etc.
            for (int i = 0; i < results.getNumberScraped(); i++) {
                System.out.println("Processing image: " + (i+1));

                //Get the individual image object
                ScrapedImage si = (ScrapedImage) results.getScrapedData()[i];
                byte b[] = null;                
                b = si.getByteImg();

                //Save the image
                File fi = new File(("./scraperOutput/" + i + ".jpg"));
                FileOutputStream fos = new FileOutputStream(fi);
                fos.write(b);

                //Scale the image to fit into a 640 x 480 frame, save it
                b = AdvImageUtil.scaleToFit(640, 480, b);
                File fis = new File(("./scraperOutput/" + i + "_small.jpg"));
                FileOutputStream foss = new FileOutputStream(fis);
                foss.write(b);

                //Add the captions to the existing captions
                captions = captions + "Caption " + (i+1) + "\n" + si.getCaption() + "\n\n";
            }

            //Save out the captions
            File ft = new File("./scraperOutput/captions.txt");
            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ft),"UTF8"));
            out.write(captions);
            out.close();
            
            System.out.println("Scraping completed");
        } catch (Exception e) {
            System.out.println(e.getMessage());
            return;
        } catch (Error e) {
            System.out.println(e.getMessage());
            return;
        }
    }
}
