#!/usr/bin/env bash

WORK_DIR=installer/04_rke_installation
TEMP_CONFIG=${WORK_DIR}/temp_config.yaml
REMOTE_VARS=${WORK_DIR}/remote_vars
REMOTE_SCRIPT=${WORK_DIR}/remote.sh
REMOTE_HOSTS=${WORK_DIR}/hosts
ERROR_DIR=${WORK_DIR}/errors
TEMPLATES_DIR=${WORK_DIR}/templates
FILES_DIR=${WORK_DIR}/files

source installer/common/functions.sh
run_command "chmod -R 0755 ${FILES_DIR}"

RKE2_CONFIG_DIR=${sisense_dir}/config/rke2
RKE2_RUNTIME_DIR=/etc/rancher/rke2

# Comes from the Provisioner's Docker image
RKE2_DIR_OFFLINE=/app/rke2-artiacts

function check_cidr_change() {
  local server_config_file=""

  # Locate the server config file
  if [[ -f "${RKE2_RUNTIME_DIR}/config.yaml" ]]; then
    server_config_file="${RKE2_RUNTIME_DIR}/config.yaml"
  elif [[ -f "${RKE2_CONFIG_DIR}"/*_server_config.yaml ]]; then
    server_config_file=$(ls "${RKE2_CONFIG_DIR}"/*_server_config.yaml | head -n 1)
  fi

  # If a server config file is found, process it
  if [[ -n "${server_config_file}" ]]; then
    log_to_sisense_installer "Validating RKE2 DNS/CIDR ..."
    
    local current_cluster_cidr current_service_cidr current_cluster_dns
    current_cluster_cidr=$(grep 'cluster-cidr' "${server_config_file}" | awk '{print $2}' | tr -d '"')
    current_service_cidr=$(grep 'service-cidr' "${server_config_file}" | awk '{print $2}' | tr -d '"')
    current_cluster_dns=$(grep 'cluster-dns' "${server_config_file}" | awk '{print $2}' | tr -d '"')

    # Check for CIDR/DNS mismatches
    if [[ -n "${config_cluster_cidr}" && "${config_cluster_cidr}" != "${current_cluster_cidr}" ]] || \
       [[ -n "${config_service_cidr}" && "${config_service_cidr}" != "${current_service_cidr}" ]] || \
       [[ -n "${config_cluster_dns}" && "${config_cluster_dns}" != "${current_cluster_dns}" ]]; then
      log_stderr "ERROR: cannot change CIDR/DNS on existing working RKE2 cluster!"
      log_stderr "If you wish to switch CIDR/DNS, please follow Sisense documentation guidelines." 1
    fi
  fi
  handle_exit_status "RKE2 DNS/CIDR validation"
}

function create_rke_configs() {
  run_command "sudo mkdir -p ${RKE2_CONFIG_DIR} && sudo chown -R ${config_linux_user:-$(id -u)}:${config_linux_user:-$(id -g)} ${RKE2_CONFIG_DIR}" "Creating RKE config dir ${RKE2_CONFIG_DIR}"
  if check_condition ${config_private_docker_registry}; then
    log_to_sisense_installer "Private Docker registry is set. Generating file ${RKE2_CONFIG_DIR}/registries.yaml"
    template_j2_file ${TEMPLATES_DIR}/registries.yaml.j2 ${VARS_FILE} ${RKE2_CONFIG_DIR}/registries.yaml
  fi

  # The only way for to make j2 CLI command recognize variables is if you *export" them
  local kube_reserved_results=$(bash ${FILES_DIR}/rke-node-size.sh)
  export recommend_system_reserved_memory=$(echo "${kube_reserved_results}" | head -1)
  export recommend_system_reserved_cpu=$(echo "${kube_reserved_results}" | tail -1)

  local nodes_details=$(yq '.k8s_nodes[] | (.node + " " + .internal_ip + " " + .external_ip)' ${VARS_FILE} | sed 's/"//g')
  export server_node_name=$(echo ${nodes_details%%$'\n'*} | awk '{print $1}')
  export server_node_addr=$(echo ${nodes_details%%$'\n'*} | awk '{print $2}')

  mapfile -t nodes_array <<< "$nodes_details"
  for node_detail in "${nodes_array[@]}"; do
    export current_node=$(awk '{print $1}' <<< "$node_detail")
    export current_internal_ip=$(awk '{print $2}' <<< "$node_detail")
    local mode=agent
    if [[ ${current_node} == ${server_node_name} ]]; then mode=server; fi

    local ip_for_copy=${current_internal_ip}
    if check_condition ${config_remote_installation}; then
      ip_for_copy=$(awk '{print $3}' <<< "$node_detail") # external IP
    fi

    log_to_sisense_installer "mode: ${mode}, node: ${current_node}, internal ip: ${current_internal_ip}"
    template_file=${TEMPLATES_DIR}/${mode}_config.yaml.j2
    current_config_file=${RKE2_CONFIG_DIR}/${current_node}_${mode}_config.yaml

    if [[ -f ${current_config_file} ]]; then
      run_command "cp ${current_config_file} ${current_config_file}.bkp" "Backing up RKE2 ${mode} config file ${current_config_file}"
    fi

    log_to_sisense_installer "Generating RKE2 ${mode} config file ${current_config_file}"
    template_j2_file ${template_file} ${VARS_FILE} ${current_config_file}

    ssh_login="-o \"StrictHostKeyChecking no\" ${config_linux_user}@${ip_for_copy}"
    mkdir_msg="Creating dir ${RKE2_RUNTIME_DIR} at host ${ip_for_copy} (${current_node})"
    mkdir_cmd="sudo mkdir -p ${RKE2_RUNTIME_DIR}"
    copy_msg="Copying file ${current_config_file} to host ${ip_for_copy} (${current_node}) into file /tmp/config.yaml"
    copy_cmd="-o \"StrictHostKeyChecking no\" ${current_config_file} ${config_linux_user}@${ip_for_copy}:/tmp/config.yaml"
    mv_msg="Moving file /tmp/config.yaml to ${RKE2_RUNTIME_DIR}/config.yaml on host ${ip_for_copy} (${current_node})"
    mv_cmd="sudo mv /tmp/config.yaml ${RKE2_RUNTIME_DIR}/config.yaml && sudo chown root:root ${RKE2_RUNTIME_DIR}/config.yaml"

    copy_reg_msg="Copying file ${RKE2_CONFIG_DIR}/registries.yaml to host ${ip_for_copy} (${current_node}) into file /tmp/registries.yaml"
    copy_reg_cmd="-o \"StrictHostKeyChecking no\" ${RKE2_CONFIG_DIR}/registries.yaml ${config_linux_user}@${ip_for_copy}:/tmp/registries.yaml"
    mv_reg_msg="Moving file /tmp/registries.yaml to ${RKE2_RUNTIME_DIR}/registries.yaml on host ${ip_for_copy} (${current_node})"
    mv_reg_cmd="sudo mv /tmp/registries.yaml ${RKE2_RUNTIME_DIR}/registries.yaml && sudo chown root:root ${RKE2_RUNTIME_DIR}/registries.yaml"

    if [[ -n ${config_password} ]]; then
      run_command "sshpass -p \"${config_password}\" ssh ${ssh_login} \"${mkdir_cmd}\"" "${mkdir_msg} using password"
      run_command "sshpass -p \"${config_password}\" scp ${copy_cmd}" "${copy_msg} using password"
      run_command "sshpass -p \"${config_password}\" ssh ${ssh_login} \"${mv_cmd}\"" "${mv_msg} using SSH password"

      if check_condition ${config_private_docker_registry}; then
        run_command "sshpass -p \"${config_password}\" scp ${copy_reg_cmd}" "${copy_reg_msg} using password"
        run_command "sshpass -p \"${config_password}\" ssh ${ssh_login} \"${mv_reg_cmd}\"" "${mv_reg_msg} using SSH password"
      fi
    else
      run_command "ssh -i ${config_ssh_key} ${ssh_login} \"${mkdir_cmd}\"" "${mkdir_msg} using SSH key"
      run_command "scp -i ${config_ssh_key} ${copy_cmd}" "${copy_msg} using SSH key"
      run_command "ssh -i ${config_ssh_key} ${ssh_login} \"${mv_cmd}\"" "${mv_msg} using SSH key"

      if check_condition ${config_private_docker_registry}; then
        run_command "scp -i ${config_ssh_key} ${copy_reg_cmd}" "${copy_reg_msg} using SSH key"
        run_command "ssh -i ${config_ssh_key} ${ssh_login} \"${mv_reg_cmd}\"" "${mv_reg_msg} using SSH key"
      fi
    fi
  done <<< "$nodes_details"

  unset recommend_system_reserved_memory recommend_system_reserved_cpu current_node current_internal_ip server_node_name server_node_addr
}

function copy_offline_rke_installation() {
  if check_condition ${config_offline_installer}; then

    local local nodes_details=$(yq '.k8s_nodes[] | (.node + " " + .internal_ip)' ${VARS_FILE} | sed 's/"//g')
    if check_condition ${config_remote_installation}; then
      local nodes_details=$(yq '.k8s_nodes[] | (.node + " " + .external_ip)' ${VARS_FILE} | sed 's/"//g')
    fi

    mapfile -t nodes_array <<< "$nodes_details"
    for node_detail in "${nodes_array[@]}"; do

      local current_node=$(awk '{print $1}' <<< "$node_detail")
      local current_ip=$(awk '{print $2}' <<< "$node_detail")
      local copy_cmd="-o \"StrictHostKeyChecking no\" -r ${RKE2_DIR_OFFLINE} ${config_linux_user}@${current_ip}:/tmp/rke2-artiacts"
      local copy_msg="Copying offline RKE2 installation dir ${RKE2_DIR_OFFLINE} to host ${current_ip} (${current_node}) into dir /tmp/rke2-artiacts"

      if [[ -n ${config_password} ]]; then
        run_command "sshpass -p \"${config_password}\" ssh -o \"StrictHostKeyChecking no\" ${config_linux_user}@${current_ip} \"rm -rf /tmp/rke2-artiacts\"" "Removing existing /tmp/rke2-artiacts directory on host ${current_ip} (${current_node}) using password"
        run_command "sshpass -p \"${config_password}\" scp ${copy_cmd}" "${copy_msg} using password"
      else
        run_command "ssh -i ${config_ssh_key} -o \"StrictHostKeyChecking no\" ${config_linux_user}@${current_ip} \"rm -rf /tmp/rke2-artiacts\"" "Removing existing /tmp/rke2-artiacts directory on host ${current_ip} (${current_node}) using SSH key"
        run_command "scp -i ${config_ssh_key} ${copy_cmd}" "${copy_msg} using SSH key"
      fi
    done <<< "$nodes_details"

  fi
}

function install_and_run_rke() {
  run_remote_script "RKE2 installation"

  # Init vars
  local first_node_internal_ip=$(yq '.k8s_nodes[0].internal_ip' ${VARS_FILE} | sed 's/"//g')
  local first_node_external_ip=$(yq '.k8s_nodes[0].external_ip' ${VARS_FILE} | sed 's/"//g')
  local remote_ip=${first_node_internal_ip}
  if check_condition ${config_remote_installation}; then
    remote_ip=${first_node_external_ip}
  fi

  ###########################################
  ### Getting kubeconfig from Server node ###
  ###########################################
  run_command "mkdir -p ~/.kube" "Making dir ~/.kube"

  local copy_cmd="-o \"StrictHostKeyChecking no\" ${config_linux_user}@${remote_ip}:${RKE2_RUNTIME_DIR}/rke2.yaml ~/.kube/config"
  local copy_msg="Getting kubeconfig file from RKE2 server node"
  if [[ -n ${config_password} ]]; then
    run_command "sshpass -p \"${config_password}\" scp ${copy_cmd}" "${copy_msg} using password"
  else
    run_command "scp -i ${config_ssh_key} ${copy_cmd}" "${copy_msg} using SSH key"
  fi
  run_command "sed -i 's/${first_node_internal_ip}/${remote_ip}/g' ~/.kube/config" "Setting k8s remote address in kubeconfig"
  run_command "chmod 600 ~/.kube/config"
  run_command "sed -i 's/127.0.0.1/${remote_ip}/g' ~/.kube/config"

  ###############################################
  ### Getting kubectl binary from Server node ###
  ###############################################
  copy_cmd="-o \"StrictHostKeyChecking no\" ${config_linux_user}@${remote_ip}:/var/lib/rancher/rke2/bin/kubectl ~/kubectl"
  copy_msg="Getting kubectl binary file from RKE2 server node"
  if [[ -n ${config_password} ]]; then
    run_command "sshpass -p \"${config_password}\" scp ${copy_cmd}" "${copy_msg} using password"
  else
    run_command "scp -i ${config_ssh_key} ${copy_cmd}" "${copy_msg} using SSH key"
  fi
  run_command "chmod 755 ~/kubectl && sudo mv ~/kubectl /usr/local/bin/" "Moving kubectl binary to /usr/local/bin/"

  ##########################################################
  ### Waiting untill all k8s nodes are in 'Ready' status ###
  ##########################################################
  local nodes_status=$(kubectl get node --no-headers | awk '{print $2}')
  local try=1
  local limit=30 #2.5 minutes
  local interval=5

  while [[ ${try} -le ${limit} && ${nodes_status} == *"NotReady"* ]]; do
    log_royal_blue "Waiting for all k8s servers to be on 'Ready' state... try ${try}/${limit}"
    sleep ${interval}
    nodes_status=$(kubectl get node --no-headers | awk '{print $2}')
    try=$(expr ${try} + 1)
  done

  are_all_nodes_ready
  handle_exit_status "Validating RKE nodes are up after installation"
  log_green "The RKE cluster has been started successfully"
}

function label_node_roles() {
  # First 3 nodes will be tagged master, controlplane, and etcd
  # All nodes (including first 3 nodes) will be tagged as worker nodes
  local node_names=$(yq '.k8s_nodes[:3] | .[].node' "${VARS_FILE}" | sed 's/"//g')

  for node_name in $node_names; do
    run_command "kubectl label node ${node_name} node-role.kubernetes.io/master=true node-role.kubernetes.io/controlplane=true node-role.kubernetes.io/etcd=true --overwrite=true" "Tagging node ${node_name} as master, controlplane, and etcd"
  done
  run_command "kubectl label node --all node-role.kubernetes.io/worker=true" "Tagging all nodes as workers"

  local node_count=$(yq '.k8s_nodes | length' ${VARS_FILE})
  local node_count_last=$(expr $node_count - 1)
  run_command "kubectl label nodes --all node-${config_namespace_name}-Application- node-${config_namespace_name}-Build- node-${config_namespace_name}-Query-"
  for i in $(seq 0 $node_count_last); do
    local node_name=$(yq ".k8s_nodes[${i}].node" ${VARS_FILE} | sed 's/"//g')
    local node_roles=$(yq ".k8s_nodes[${i}].roles" ${VARS_FILE} | sed 's/"//g')
    
    if [[ ${node_roles,,} == *"application"* ]]; then
      run_command "kubectl label node ${node_name} node-${config_namespace_name}-Application=true --overwrite=true" "Adding application label to node ${node_name}"
    fi
    if [[ ${node_roles,,} == *"query"* ]]; then
      run_command "kubectl label node ${node_name} node-${config_namespace_name}-Query=true --overwrite=true" "Adding query label to node ${node_name}"
    fi
    if [[ ${node_roles,,} == *"build"* ]]; then
      run_command "kubectl label node ${node_name} node-${config_namespace_name}-Build=true --overwrite=true" "Adding build label to node ${node_name}"
    fi
    run_command "kubectl taint nodes ${node_name} node-role.kubernetes.io/master:NoSchedule- >/dev/null 2>&1 || true" "Remove master NoSchedule taint fron node ${node_name} (Ignoring not found...)"
  done
  
  are_all_nodes_ready
  handle_exit_status "validating K8s nodes are up after tagging nodes"
}

function recover_kubernetes() {
  if check_condition ${config_recover_kubernetes}; then
    # Based on documentation at: https://docs.rke2.io/datastore/backup_restore

    ## Restore ETCD can be ran only from RKE2 server node ("node1")
    ## Gather latest ETCD snapshot
    etcd_snapshot_path=$(sudo rke2 etcd-snapshot ls 2>/dev/null | tail -1 | awk '{print $2}' | sed 's|file://||')

    if [[ -n ${etcd_snapshot_path} ]]; then
      log_to_sisense_installer "Restoring an ETCD from snapshot ${etcd_snapshot_path} ..."
      run_command "sudo systemctl stop rke2-server" "Stopping rke2-server"
      run_command "sudo rke2 server --cluster-reset --cluster-reset-restore-path=${etcd_snapshot_path}" "Restoring RKE2 cluster from ETCD snapshot"
      run_command "sudo systemctl daemon-reload" "Running sudo systemctl daemon-reload"
      run_command "sudo systemctl restart rke2-server" "Restarting RKE2 server"
      log_green "ETCD snapshot restored succesfully. Continuing with installation"
      # After that, later on it'll go into each node and will do "systemctl restart rke2-server/agent"
    else
      log_stderr "No ETCD snapshot found, cannot continue with recover Kubernetes process..." 1
      handle_exit_status "recover kubernetes"
    fi
  fi
}
