package yswg.sqoop.plugin;

import com.cloudera.sqoop.lib.DelimiterSet;

import java.util.regex.Pattern;

/**
 *
 */
public final class YswgFieldFormatter {

	/**
	 * 修改点1: \t 分隔符冲突替换成空字符串
	 * 修改点2: \u0000  空白符导出到pg会报错
	 */
	private static final Pattern REPLACE_PATTERN = Pattern.compile("\\n|\\r|\01|\\t|\\u0000");

	/**
	 * This drops all default Hive delimiters from the string and passes it on.
	 * <p>
	 * These delimiters are \n, \r and \01. This method is invoked when the
	 * command line option {@code --hive-drop-delims} is provided.
	 *
	 * @param str
	 * @param delimiters
	 * @return
	 */
	public static String hiveStringDropDelims(String str,
	                                          com.cloudera.sqoop.lib.DelimiterSet delimiters) {
		return hiveStringReplaceDelims(str, "", delimiters);
	}

	/**
	 * replace hive delimiters with a user-defined string passed to the
	 * --hive-delims-replacement option.
	 *
	 * @param str
	 * @param delimiters
	 * @return
	 */
	public static String hiveStringReplaceDelims(String str, String replacement,
	                                             com.cloudera.sqoop.lib.DelimiterSet delimiters) {
		String droppedDelims = REPLACE_PATTERN.matcher(str).replaceAll(replacement);
		return escapeAndEnclose(droppedDelims, delimiters);
	}

	/**
	 * Takes an input string representing the value of a field, encloses it in
	 * enclosing chars, and escapes any occurrences of such characters in the
	 * middle.  The escape character itself is also escaped if it appears in the
	 * text of the field.  If there is no enclosing character, then any
	 * delimiters present in the field body are escaped instead.
	 * <p>
	 * The field is enclosed only if:
	 * enclose != '\000', and:
	 * encloseRequired is true, or
	 * one of the fields-terminated-by or lines-terminated-by characters is
	 * present in the string.
	 * <p>
	 * Escaping is not performed if the escape char is '\000'.
	 *
	 * @param str        - The user's string to escape and enclose
	 * @param delimiters - The DelimiterSet to use identifying the escape and
	 *                   enclose semantics. If the specified escape or enclose characters are
	 *                   '\000', those operations are not performed.
	 * @return the escaped, enclosed version of 'str'.
	 */
	public static String escapeAndEnclose(String str,
	                                      com.cloudera.sqoop.lib.DelimiterSet delimiters) {
		if (null == str) {
			return null;
		}
		// 修改点3: 对字符串进行自动前后trim()
		str = str.trim();

		char escape = delimiters.getEscapedBy();
		char enclose = delimiters.getEnclosedBy();
		boolean encloseRequired = delimiters.isEncloseRequired();

		// true if we can use an escape character.
		boolean escapingLegal =
				com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR != escape;
		String withEscapes;


		if (escapingLegal) {
			// escaping is legal. Escape any instances of the escape char itself.
			withEscapes = str.replace("" + escape, "" + escape + escape);
		} else {
			// no need to double-escape
			withEscapes = str;
		}

		if (com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR == enclose) {
			// The enclose-with character was left unset, so we can't enclose items.

			if (escapingLegal) {
				// If the user has used the fields-terminated-by or
				// lines-terminated-by characters in the string, escape them if we
				// have an escape character.
				String fields = "" + delimiters.getFieldsTerminatedBy();
				String lines = "" + delimiters.getLinesTerminatedBy();
				withEscapes = withEscapes.replace(fields, "" + escape + fields);
				withEscapes = withEscapes.replace(lines, "" + escape + lines);
			}

			// No enclosing possible, so now return this.
			return withEscapes;
		}

		// if we have an enclosing character, and escaping is legal, then the
		// encloser must always be escaped.
		if (escapingLegal) {
			withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
		}

		boolean actuallyDoEnclose = encloseRequired;
		if (!actuallyDoEnclose) {
			// check if the string requires enclosing.
			char[] mustEncloseFor = new char[2];
			mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
			mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
			for (char reason : mustEncloseFor) {
				if (str.indexOf(reason) != -1) {
					actuallyDoEnclose = true;
					break;
				}
			}
		}

		if (actuallyDoEnclose) {
			return "" + enclose + withEscapes + enclose;
		} else {
			return withEscapes;
		}
	}

	private YswgFieldFormatter() {
	}
}
