
package uk.co.wingpath.util;

import java.io.*;
import java.util.*;

/**
* This class provides static methods to be used when reading and writing
* CSV-formatted files.
*/
public class Csv
{
    private Csv ()
    {
    }

    /**
    * Constructs a CSV-formatted line from the supplied elements.
    * <p>Each element is quoted using <code>'"'</code>s, any occurrence of
    * <code>'"'</code> within the elements is preceded by another 
    * <code>'"'</code>, and the elements are separated by <code>','</code>.
    * @param elements the elements to be included in the line.
    * @return CSV line constructed from the supplied elements.
    */
    public static String buildLine (String [] elements)
    {
        StringBuilder sb = new StringBuilder ();

        for (int i = 0 ; i < elements.length ; i++)
        {
            if (i != 0)
                sb.append (',');
            String element = elements [i];
            sb.append ('"');

            for (int j = 0 ; j < element.length () ; j++)
            {
                char c = element.charAt (j);
                if (c == '"')
                    sb.append (c);
                sb.append (c);
            }

            sb.append ('"');
        }

        return sb.toString ();
    }

    /**
    * States used by {@link #splitLine} method.
    */
    private enum State
    {
        START,          // At start of field.
        UNQUOTED,       // In unquoted field.
        QUOTE_ODD,      // In quoted field, after an odd number of quotes.
        QUOTE_EVEN,     // In quoted field, after an even number of quotes.
        QUOTE_END,      // After closing quote of quoted field.
        ESCAPED         // In unquoted field, after backslash.
    }

    /**
    * Splits a CSV-formatted line into its elements.
    * <p>The elements are assumed to be separated by <code>','</code>.
    * Elements may be quoted using <code>'"'</code> or unquoted. A mixture
    * of quoted and unquoted elements in the same line is allowed.
    * @param line the CSV-formatted line.
    * @return the elements extracted from the line.
    * @throws ValueException if the line is badly formatted.
    */
    public static String [] splitLine (String line)
        throws ValueException
    {
        // Handle empty line specially, returning zero elements.
        // The code below would return one element for an empty line - the
        // number of elements that it returns is always one more than the
        // number of comma separators.
        line = line.trim ();
        if (line.equals (""))
            return new String [0];

        // List of elements found.
        List<String> elements = new ArrayList<String> ();

        // Builder for each field.
        StringBuilder sb = new StringBuilder ();

        // Use finite-state machine to parse the line.
        State state = State.START;

        for (int i = 0 ; i < line.length () ; i++)
        {
            char c = line.charAt (i);
            switch (state)
            {
            case START:
                // At start of field.
                // This may be the beginning of the line, or after a comma.
                if (Character.isWhitespace (c))
                {
                    // Skip any leading spaces.
                }
                else if (c == '"')
                {
                    // Start of a quoted field.
                    state = State.QUOTE_ODD;
                }
                else if (c == ',')
                {
                    // An empty field.
                    elements.add ("");
                }
                else if (c == '\\')
                {
                    // An unquoted field starting with a backslash.
                    state = State.ESCAPED;
                }
                else
                {
                    // Start of unquoted field.
                    sb.append (c);
                    state = State.UNQUOTED;
                }
                break;

            case UNQUOTED:
                // In unquoted field.
                if (c == '"')
                {
                    // Shouldn't have quotes in an unquoted field.
                    throw new ValueException (
                        "char " + (i + 1) + ": Unescaped quote");
                }
                else if (c == ',')
                {
                    // End of unquoted field.
                    // Remove any trailing spaces and save field.
                    String str = sb.toString ().trim ();
                    if (str.equals (""))
                        str = "";
                    elements.add (str);
                    sb.setLength (0);
                    state = State.START;
                }
                else if (c == '\\')
                {
                    // Unix-style escape.
                    state = State.ESCAPED;
                }
                else
                {
                    // Normal or space character - add it to field.
                    // Any trailing spaces are trimmed when we have the
                    // whole field.
                    sb.append (c);
                }
                break;

            case QUOTE_ODD:
                // In quoted field, after an odd number of quotes.
                if (c == '"')
                {
                    // This may be the end of the field, or it may be
                    // escaping a following quote.
                    state = State.QUOTE_EVEN;
                }
                else
                {
                    // Normal character, or space, or comma, or backslash -
                    // add it to field.
                    sb.append (c);
                }
                break;

            case QUOTE_EVEN:
                // In quoted field, after an even number of quotes.
                // Either the preceding quote was escaping a quote (in which
                // the current character is a quote), or the preceding quote
                // was the end of the field (in which case we should have
                // a comma or trailing spaces).
                if (Character.isWhitespace (c))
                {
                    // Trailing space after the end of the field.
                    // Save the field now, and expect more spaces or a comma.
                    String str = sb.toString ();
                    if (str.equals (""))
                        str = "";
                    elements.add (str);
                    sb.setLength (0);
                    state = State.QUOTE_END;
                }
                else if (c == '"')
                {
                    // Escaped quote - add it to the field.
                    sb.append ('"');
                    state = State.QUOTE_ODD;
                }
                else if (c == ',')
                {
                    // End of field - save it.
                    String str = sb.toString ();
                    if (str.equals (""))
                        str = "";
                    elements.add (str);
                    sb.setLength (0);
                    state = State.START;
                }
                else
                {
                    // Shouldn't be anything else after a closing quote.
                    throw new ValueException (
                        "char " + (i + 1) + ": Invalid character '" + c + "'");
                }
                break;

            case QUOTE_END:
                // After closing quote of quoted field.
                // Should only be trailing spaces or a comma.
                if (Character.isWhitespace (c))
                {
                }
                else if (c == ',')
                {
                    state = State.START;
                }
                else
                {
                    throw new ValueException (
                        "char " + (i + 1) + ": Invalid character '" + c + "'");
                }
                break;

            case ESCAPED:
                // In unquoted field, after a backslash.
                // Add the current character to the field, whatever it is.
                sb.append (c);
                state = State.UNQUOTED;
                break;
            }
        }

        // At the end of the line.
        // Save the last field of the line.
        switch (state)
        {
        case START:
            // Nothing except spaces after the last comma - so empty field.
            // We can't be at the beginnning of the line, since empty lines
            // are handled specially.
            elements.add ("");
            break;
        case UNQUOTED:
            // Unquoted field. We have already trimmed the line, so there
            // is no need to trim trailing spaces here.
            {
                String str = sb.toString ();
                if (str.equals (""))
                    str = "";
                elements.add (str);
            }
            break;
        case QUOTE_ODD:
            // In a quoted field without a closing quote.
            throw new ValueException ("Missing closing quote");
        case QUOTE_EVEN:
            // At end of properly quoted field.
            {
                String str = sb.toString ();
                if (str.equals (""))
                    str = "";
                elements.add (str);
            }
            break;
        case QUOTE_END:
            // After the end of a quoted field. This can't happen since we
            // have already trimmed the line, so there can't be any trailing
            // spaces.
            throw new AssertionError ("Unreachable");
        case ESCAPED:
            // Don't allow backslash at the end of a line.
            throw new ValueException ("Illegal backslash at end of line");
        }

        return elements.toArray (new String [0]);
    }
}

