export interface sanitizeQueryProps {
  currentQueryInput: string;
  queryLimit: number;
  setQueryLimit: (limit: number) => void;
  setCurrentQueryInput: (input: string) => void;
  enforceLimit: boolean;
}

export const sanitizeQuery = (props: sanitizeQueryProps) => {
  const maxQueryLimit = 1000;
  // Split and pre-process the input query
  const originalTokens: string[] = getProcessedTokens(
    props.currentQueryInput.trim(),
  );
  let filteredTokens = filterEmptyTokens(originalTokens);
  const lowercaseTokens = filteredTokens.map((token) => token.toLowerCase());

  // Check if it's a SELECT statement
  const isSelectStatement = isSelect(props.currentQueryInput);

  // For non-select statements, no sanitizing needed. Return the original query with trimming and filtering.
  if (!isSelectStatement) {
    return buildStatement(filteredTokens);
  }

  // Find the LIMIT value
  let sanitizedLimitValue = props.queryLimit ? props.queryLimit : 100;

  if (!props.queryLimit) {
    props.setQueryLimit(100);
  }

  const existingLimitIndex = lowercaseTokens.indexOf("limit");
  const existingLimitValue = lowercaseTokens[existingLimitIndex + 1] || null;

  // Return probable compound statements as written to ensure correct errors are thrown
  if (isProbablyCompoundQuery(filteredTokens, existingLimitIndex)) {
    return buildStatement(filteredTokens);
  }

  // Sanitize the LIMIT value
  if (props.enforceLimit && existingLimitIndex !== -1) {
    const limitValueAsInt =
      existingLimitValue != null ? parseInt(existingLimitValue) : null;

    // Valid limit values will be used.
    // Limit values exceeding the maximum will be set to the maximum.
    // All other invalid limit values will default to whatever queryLimit is set to.
    if (
      limitValueAsInt &&
      limitValueAsInt > 0 &&
      limitValueAsInt <= maxQueryLimit
    ) {
      sanitizedLimitValue = limitValueAsInt;
    } else if (limitValueAsInt != null && limitValueAsInt > maxQueryLimit) {
      sanitizedLimitValue = maxQueryLimit;
    }
    props.setQueryLimit(sanitizedLimitValue);
    // Remove the existing limit statement, which will be replaced below
    filteredTokens.splice(existingLimitIndex, 2);
  }

  filteredTokens = removeTrailingSemicolons(filteredTokens);

  const sanitizedSelectStatement = buildStatement(filteredTokens);

  const singleLineCommentAtEndPattern = /--(.|\n)*/g;
  const commentAtEndWithSemicolonPattern = /;[\s\S]*(\/\*|--)[\s\S]*$/;

  let completeSanitizedQuery: string;

  if (commentAtEndWithSemicolonPattern.test(sanitizedSelectStatement)) {
    //in this case removing comments, then trimming the spaces, then removing a ; at the end of query string
    completeSanitizedQuery = uncomment(sanitizedSelectStatement)
      .trim()
      .slice(0, -1);
  } else if (singleLineCommentAtEndPattern.test(sanitizedSelectStatement)) {
    //in this case removing comments, then trimming the spaces from query string
    completeSanitizedQuery = uncomment(sanitizedSelectStatement).trim();
  } else {
    completeSanitizedQuery = `${sanitizedSelectStatement}`;
  }

  if (props.enforceLimit) {
    completeSanitizedQuery = `${completeSanitizedQuery} LIMIT ${sanitizedLimitValue}`;

    if (existingLimitIndex !== -1) {
      props.setCurrentQueryInput(completeSanitizedQuery);
    } else {
      //only render limit in query input if limit was already user-provided
      props.setCurrentQueryInput(sanitizedSelectStatement);
    }
  }
  return completeSanitizedQuery;
};

function getProcessedTokens(query: string) {
  //this will match whitespaces, but it will only be considered a match if they are not followed by any sequence of characters(that are not opening square bracket) followed by closing square bracket.
  // We need to treat new lines as their own symbol so they aren't erased and mess up comments.
  // We only look for spaces and tabs here, then we handle new lines differently.
  const regex = /[ \t]+(?![^[]*])/g;
  const splitStrings = query.split(regex);
  const splitStringsWithNewLines = [];
  for (const str of splitStrings) {
    // Split on newlines but keep them as a character in the array.
    // E.x. for this string:
    // "Hello\nWorld!"
    // We end up with:
    // ["Hello", "\n", "World!"]
    splitStringsWithNewLines.push(...str.split(/(\n)/));
  }
  return splitStringsWithNewLines;
}

function buildStatement(tokens: string[]) {
  let queryString = "";
  tokens.forEach((token, index) => {
    queryString += token;
    // Add a space after the string unless we're the last token, we're a newline,
    // or the next token is a newline.
    if (
      index < tokens.length - 1 &&
      token !== "\n" &&
      tokens[index + 1] !== "\n"
    ) {
      queryString += " ";
    }
  });

  return queryString;
}

// Checks if there's more content after the LIMIT statement and value. If there is, it could be a compound query.
function isProbablyCompoundQuery(tokens: string[], startIndex: number) {
  const contentAfterLimit = tokens.slice(startIndex, tokens.length - 1);
  return contentAfterLimit.length > 1;
}

// Trim semicolons from the end of the statement to prevent issues with multi-statement errors
function removeTrailingSemicolons(tokens: string[]) {
  let lastTokenIndex = tokens.length - 1;

  while (tokens.length > 0 && tokens[lastTokenIndex].endsWith(";")) {
    tokens[lastTokenIndex] = tokens[lastTokenIndex].replace(/;+$/, "");
    tokens = filterEmptyTokens(tokens);
    lastTokenIndex = tokens.length - 1;
  }
  return tokens;
}

function filterEmptyTokens(tokens: string[]) {
  return tokens.filter((token) => token !== "");
}

export function isSelect(query: string) {
  query = uncomment(query);
  const originalTokens = getProcessedTokens(query.trim());
  const filteredTokens = filterEmptyTokens(originalTokens);
  const lowercaseTokens = filteredTokens.map((token) => token.toLowerCase());
  return lowercaseTokens[0] === "select";
}

function uncomment(string: string) {
  if (!string) {
    return "";
  }

  string = string.replace(/--.*?(\n|$)/g, "");
  string = string.replace(/\/\*[\s\S]*?\*\//g, "");
  return string;
}
