/**This class provides frequently used string manipulation functions,
 * typically used for turning journal captions into a more readable format.
 * @author Chris Jarabek (cjjarabe@ucalgary.ca) 
 */

package org.xenbase.utilities;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringEscapeUtils;

public class StringUtil {



    /**Utility function that removes excess whitespace, this was included
     * because of some peculiar formatting in the captions of some journals.
     * @param String inputStr
     * @return String
     */
    public static String removeDuplicateWhitespace(String inputStr) throws Exception, Error {        
        String patternStr = "\\s+";
        String replaceStr = " ";
        Pattern pattern = Pattern.compile(patternStr);
        Matcher matcher = pattern.matcher(inputStr);
        return matcher.replaceAll(replaceStr);
    }
    /**Utility function that removes XML or HTML tags from a string.
     * It is possible that a string could be constructed which would could
     * be identified as falsely containing a tag.  However this case is
     * remote and as such, has not been addressed.
     * @param String message
     * @return String message without XML or HTML
     */
    public static String stripHTMLTags(String message) throws Exception, Error{
        String noHTMLString = message.replaceAll("\\<.*?\\>", "");
        return noHTMLString;
    }
    /**Utility function that sanitizes a string so it can be used as a URL
     * @param String URL
     * @return String
     */
    public static String convertUrl(String url) throws Exception, Error {
        url = url.replaceAll("<", "%3C");
        url = url.replaceAll(">", "%3E");
        return url;
    }
    /**Utility function that un-escapes all of the HTML character entities that
     * may be in the caption body.  This turns them into human readable UTF-8
     * (Uni-code) characters.
     * @param String URL
     * @return String
     */
    public static String unescapeHTML(String text) throws Exception, Error {
        String retVal = StringEscapeUtils.unescapeHtml(text);
        return retVal;
    }
 }
