#!/usr/bin/env bash

# This script runs on all remote k8s nodes
# It runs all sorts of validations such CPU/MEM count, available storage, etc...
# The general variables that are used here comes from the "remote_vars" file, generated in run.sh

function validate_os_version() {
    log "Validating OS and its version"

    if [[ ${OS} == *"Red Hat"* ]]; then
      if [[ $VER != "8"* ]] && [[ $VER != "9."* ]]; then
        log_stderr "Unsupported version has been detected, Sisense Linux deployment is certified to run on Red Hat 8.x/9.x only" 2
      else sudo yum install -y gawk; fi

    elif [[ ${OS} == "Ubuntu" ]]; then
      if [[ ! $VER =~ ^(22\.04|23\.04|24\.04)$ ]]; then
        log_stderr "Unsupported version has been detected, Sisense Linux deployment is certified to run on Ubuntu 22.04/23.04/24.04 only" 2
      else 
        sudo apt install -y gawk;
      fi
    elif [[ ${OS} == *"CentOS"* ]]; then
      if [[ $VER != "8"* ]] && [[ $VER != "9"* ]]; then
        log_stderr "Unsupported version has been detected, Sisense Linux deployment is certified to run on CentOS 8.x/9.x only" 2
      else sudo yum install -y gawk; fi
    elif [[ ${OS} == *"Rocky"* ]]; then
      sudo yum install -y gawk
    elif [[ ${OS} == *"Amazon"* ]]; then
      if [[ $VER != "2" && $VER != "2023" ]]; then
        log_stderr "Unsupported version has been detected, Sisense Linux deployment is certified to run on Amazon Linux 2 and 2023 only" 2
      else sudo yum install -y gawk; fi
    else
      log_stderr "OS is not supported, please vist https://documentation.sisense.com/latest/linux/step1.htm for more information" 2
    fi
    handle_exit_status "validate_os_version"
}

function test_sudo_permissions() {
    if ! check_condition ${is_kubernetes_cloud} && ! check_condition ${is_openshift} &&
      ! check_condition ${offline_installer} && ! check_condition ${rwx_sc_name} && ! check_condition ${rwo_sc_name}; then
      run_command "bash -c 'timeout -k 2 2 sudo /bin/chmod --help >/dev/null 2>&1' >/dev/null 2>&1" "validating user ${linux_user} has sufficient sudo permissions"
      log_green "User ${linux_user} has sufficient sudo permissions"
    fi
}

function configure_sysctl() {
  # Needed for Ubuntu 22, otherwise fluent-bit won't work there
  # Without it, fluent-bit will have error "failed to create fsnotify watcher: too many open files"
  # Also needed for other OSs which sometimes can throw this error when tailing the provisioner's log

  local default_max_user_watches=100000
  local current_max_user_watches=$(sudo sysctl -n fs.inotify.max_user_watches)
  current_max_user_watches=${current_max_user_watches:-0}

  if [[ ${default_max_user_watches} -gt ${current_max_user_watches} ]]; then
    log "Setting fs.inotify.max_user_watches to ${default_max_user_watches}"
    declare -A entries=(
      ["fs.inotify.max_user_watches"]="${default_max_user_watches}"
    )
    set_sysctl_entries /etc/sysctl.conf
    handle_exit_status "Setting fs.inotify.max_user_watches to ${default_max_user_watches}"
  fi

  local default_max_user_instances=100000
  local current_max_user_instances=$(sudo sysctl -n fs.inotify.max_user_instances)
  current_max_user_instances=${current_max_user_instances:-0}

  if [[ ${default_max_user_instances} -gt ${current_max_user_instances} ]]; then
    log "Setting fs.inotify.max_user_instances to ${default_max_user_instances}"
    declare -A entries=(
      ["fs.inotify.max_user_instances"]="${default_max_user_instances}"
    )
    set_sysctl_entries /etc/sysctl.conf
    handle_exit_status "Setting fs.inotify.max_user_instances to ${default_max_user_instances}"
  fi
}

function check_nm_cloud_setup() {
  if [[ ${OS} == *"Red Hat"* && ${VER} == "8.4" ]] && ! check_condition ${update}; then
    nm_cloud_setup_count=$(systemctl list-unit-files | grep enabled | grep nm-cloud-setup | wc -l)
    nm_cloud_setup_timer_running=$(systemctl status nm-cloud-setup.timer | grep "Main PID" | wc -l)
    nm_cloud_setup_running=$(systemctl status nm-cloud-setup | grep "Main PID" | wc -l)

    if [[ ${nm_cloud_setup_count} -gt 0 || ${nm_cloud_setup_timer_running} -gt 0 || ${nm_cloud_setup_running} -gt 0 ]]; then
      log_stderr "ERROR: nm-cloud-setup is enabled at node ${NODE_NAME} (Internal IP: ${INTERNAL_IP})"
      log_stderr "In RedHat 8.4 you must disable nm-cloud-setup and then reboot!"
      log_stderr "sudo systemctl disable nm-cloud-setup.service nm-cloud-setup.timer"
      log_stderr "sudo reboot"
      ret 4
    fi
    handle_exit_status "check_nm_cloud_setup"
  fi
}

function disable_fireall_services() {
  # If on-prem and first time k8s install and not offline, then disable firewall...
  if not_uninstall && ! check_condition ${offline_installer} && ! check_condition ${recover_kubernetes} &&
  ! check_condition ${update_k8s_version} && ! check_condition ${update}; then

    log "Validating firewall is disabled or non existing."
    local service_name=ufw #Ubuntu
    if [[ ${OS} == *"Red Hat"* || ${OS} == *"CentOS"* || ${OS} == *"Amazon"* ]]; then
      service_name=firewalld
    fi
    if systemctl status ${service_name} >/dev/null 2>&1; then
      log "Service ${service_name} found. Disabling..."
      run_command "sudo systemctl stop ${service_name}"
      run_command "sudo systemctl disable ${service_name}"
    else
      log "Service ${service_name} not found."
    fi

  fi
}

function validate_cpu_ram() {
  if not_uninstall; then
    log "Validating amount of CPU and RAM"
    local min_ram_single=16384
    local min_ram_cluster=32768
    local ram_delta=2048
    local min_cpu=8
    local cpu_delta=1

    # Get CPU and RAM information in MB
    local cpu=$(nproc)
    local ram=$(free -m | awk '/Mem:/ {print $2}')
    log "CPU count: ${cpu}, RAM count (MB): ${ram}"

    if [[ ${cpu} -lt $((min_cpu - cpu_delta)) ]]; then
      log_stderr "ERROR: Insufficient CPU (${cpu})! Minimum required CPU cores: ${min_cpu}" 4
      handle_exit_status "validate_cpu_ram"
    fi
    
    if check_condition ${cluster_mode} && [[ ${ram} -lt $((min_ram_cluster - ram_delta)) ]]; then
      log_stderr "ERROR: Insufficient RAM for cluster mode (${ram} MB)! Minimum required RAM: ${min_ram_cluster} MB" 4
    elif [[ ${ram} -lt $((min_ram_single - ram_delta)) ]]; then
      log_stderr "ERROR: Insufficient RAM for single mode (${ram} MB)! Minimum required RAM: ${min_ram_single} MB" 4
    else
      log_green "The machine has sufficient CPU and RAM (${cpu} CPUs, ${ram} MB RAM)"
    fi
    handle_exit_status "validate_cpu_ram"
  fi
}

function validate_ports() {
  local ports=($@)
  local used_ports=()

  for port in "${ports[@]}"; do
    log "Checking port ${port}"
    if ss -tuln | grep -q ":${port} "; then
      used_ports+=("$port")
    fi
  done

  if [ ${#used_ports[@]} -eq 0 ]; then
    log_green "All ports are opened."
  else
    log_stderr "The following ports are not opened:"
    printf '  - %s\n' "${used_ports[@]}" >&2
    ret 5
  fi
}

function validate_ports_wrapper() {
  # If on-prem and first time k8s install then run this validation...
  if not_uninstall && ! check_condition ${recover_kubernetes} &&
  ! check_condition ${update_k8s_version} && ! check_condition ${update}; then

    log "Validating Kuberenetes ports are opened"
    local ports=(
      "80" #HTTP
      "8181" #NGINX Ingress Controller
      "443" #Rancher agent
      "9099" #Canal/Flannel/Calico-node livenessProbe/readinessProbe
      "2380" #etcd peer communication
      "2376" #Docker daemon TLS port used by Docker Machine
      "2379" #etcd client requests
      "179" #calico BGP port
      "6443" #Kubernetes apiserver
      "10250" #kubelet
      "10255" #kubelet read-only port
      "10248" #kubelet healthz
      "10249" #kube-proxy
      "10251" #kube-scheduler
      "10252" #kube-controller
      "10254" #Ingress controller livenessProbe/readinessProbe
      "10256" #kube-proxy
      "10257" #kube-controller
      "10259" #kube-scheduler
    )
    validate_ports ${ports[@]}
    handle_exit_status "validating Kuberenetes ports"

    if [[ ${storage_type,,} == "rook-ceph" ]]; then
      log "Validating Rook Ceph ports are opened"
      local ports=(
        "9080" #CSI_RBD_LIVENESS_METRICS_PORT
        "9081" #CSI_CEPHFS_LIVENESS_METRICS_PORT
      )
      validate_ports ${ports[@]}
      handle_exit_status "validating Rook Ceph ports"
    fi

    if ! check_condition ${is_ssl} && [[ ${gateway_port} -ne 80 ]] && ! check_condition ${cloud_load_balancer} && ! check_condition ${alb_controller_enabled}; then
      log "Validating API Gateway port is opened"
      validate_ports ${gateway_port}
      handle_exit_status "validating API Gateway port"
    fi

    if ! check_condition ${cluster_mode} && check_condition ${expose_nodeports}; then
      log "Validating Node Ports are opened"
      local ports=(
        "30017" #Mongodb nodePort
        "30086" #Build app nodePort
        "30096" #Build debug nodePort
        "30555" #Connectors debug nodePort
        "30082" #Management app nodePort
        "30092" #Management debug nodePort
        "30084" #Query app nodePort
        "30094" #Query debug nodePort
        "30870" #Translation app nodePort
        "30022" #Translation debug nodePort
      )
      validate_ports ${ports[@]}
      handle_exit_status "validating Node Ports are opened"
    fi
    log_green "All relevant ports are opened. Validation passed."
  fi
}

function validate_docker_disk_size() {
  if not_uninstall && ! check_condition ${update}; then
    local mount_points=""
    mount_points=$(df -m | awk '{print $6}')

    if [[ "$mount_points" == *"/var/lib/docker"* ]]; then 
      current_var_mount="/var/lib/docker"
    elif [[ "$mount_points" == *"/var/lib"* ]]; then
      current_var_mount="/var/lib"
    elif [[ "$mount_points" == *"/var"* ]]; then 
      current_var_mount="/var"
    else
      current_var_mount="/"
    fi

    log "Validating disk space for Docker on ${current_var_mount} ..."

    # Local defined variables
    local root_disk_limit_mb=153600
    local delta_root_disk_limit_mb=15360
    local root_disk_avail_limit_mb=30720
    local min_disk_size_mb=$(( root_disk_limit_mb - delta_root_disk_limit_mb ))
    local min_disk_size_gb=$(( min_disk_size_mb / 1024 )).$(( (min_disk_size_mb % 1024) * 100 / 1024 ))
    local minavailable_space_gb=$(( root_disk_avail_limit_mb / 1024 )).$(( (root_disk_avail_limit_mb % 1024) * 100 / 1024 ))

    # Get the disk usage information for ${current_var_mount}
    local df_output=$(df -BM ${current_var_mount})

    # Extract the used and available sizes in MB using awk
    local total_disk_size_mb=$(echo "$df_output" | awk 'NR==2 {print $2}' | sed 's/M$//')
    local available_space_mb=$(echo "$df_output" | awk 'NR==2 {print $4}' | sed 's/M$//')    

    # Convert used and available sizes from MB to GB for display
    local total_disk_size_gb=$(( total_disk_size_mb / 1024 )).$(( (total_disk_size_mb % 1024) * 100 / 1024 ))
    local available_space_gb=$(( available_space_mb / 1024 )).$(( (available_space_mb % 1024) * 100 / 1024 ))

    log "Docker mount ${current_var_mount} results:"

    # Check the conditions
    if (( total_disk_size_mb >= min_disk_size_mb )) && (( available_space_mb >= root_disk_avail_limit_mb )); then
        log "Required disk size: ${min_disk_size_gb}GB, Current disk size: ${total_disk_size_gb}GB"
        log "Required available space: ${minavailable_space_gb}GB, Current available space: ${available_space_gb}GB"
        log_green "Disk space validation for ${current_var_mount} passed."
    else
        log_stderr "Required disk size: ${min_disk_size_gb}GB, Current disk size: ${total_disk_size_gb}GB"
        log_stderr "Required available space: ${minavailable_space_gb}GB, Current available space: ${available_space_gb}GB"
        log_stderr "ERROR: Disk space validation ${current_var_mount} failed." 3
    fi
    handle_exit_status "validate_docker_disk_size"
  fi
}

function validate_opt_disk_configuration() {
  if not_uninstall && ! check_condition ${update} && ! check_condition ${cluster_mode}; then

    local second_disk=/opt/sisense
    if ! df -BM ${second_disk} | grep -q ${second_disk}; then
      second_disk=/opt
      if ! df -BM ${second_disk} | grep -q ${second_disk}; then
        log_stderr "ERROR: Sisense has no dedicated mount point on /opt or /opt/sisense, which is mandatory requierment in single node!" 5
        handle_exit_status "validate_opt_disk_configuration"
      fi
    fi
    log "Validating disk space on ${second_disk}"

    local min_second_disk_size_mb=51200
    local min_second_disk_size_gb=$(( min_second_disk_size_mb / 1024 )).$(( (min_second_disk_size_mb % 1024) * 100 / 1024 ))

    local df_output=$(df -BM ${second_disk})
    local opt_total_disk_size_mb=$(echo "$df_output" | awk 'NR==2 {print $2}' | sed 's/M$//')
    local opt_total_disk_size_gb=$(( opt_total_disk_size_mb / 1024 )).$(( (opt_total_disk_size_mb % 1024) * 100 / 1024 ))

    log "Sisense ${second_disk} minimum requierment disk size: ${min_second_disk_size_gb}GB"
    log "Current ${second_disk} disk size: ${opt_total_disk_size_gb}GB"
    if (( opt_total_disk_size_mb >= min_second_disk_size_mb )); then
      log_green "Disk space validation for ${second_disk} passed."
    else
      log_stderr "ERROR: Disk space validation ${second_disk} failed." 5
    fi 
    handle_exit_status "validate_opt_disk_configuration"
  fi
}

function validate_disk_performance_write() {
  local dir=$1
  local write_time_threshold=$2
  log "Testing Disk Performance ${dir} - WRITE"

  local write_test_time=$(sudo dd if=/dev/zero of=${dir}/test.img bs=512 count=2000 oflag=dsync 2>&1 | awk '/copied/ {print $8}')
  handle_exit_status "validate disk performance ${dir} - WRITE"

  log "Dir ${dir} write time max threshold: ${write_time_threshold}s"
  log "Dir ${dir} write time test: ${write_test_time}s"

  # if $write_test_time < $write_time_threshold - - compare float numbers without the need to install "bc"
  if awk "BEGIN { if ($write_test_time < $write_time_threshold) exit 0; else exit 1; }"; then
    log "Disk ${dir} WRITE performance validation passed.."
  else
    log_stderr "ERROR: Disk ${dir} WRITE performance validation failed. does not meet requirements: ${write_time_threshold}s." 5
  fi
  handle_exit_status "validate_disk_performance_write ${dir}"
}

function validate_disk_performance_read() {
  local dir=$1
  local read_time_threshold=$2
  log "Testing Disk Performance ${dir} - READ"

  local read_test_time=$(sudo dd if=${dir}/test.img of=/dev/null bs=512 2>&1 | awk '/copied/ {print $8}')
  handle_exit_status "validate disk performance ${dir} - READ"

  log "Dir ${dir} read time max threshold: ${read_time_threshold}s"
  log "Dir ${dir} read time test: ${read_test_time}s"

  # if $read_test_time < $read_time_threshold - compare float numbers without the need to install "bc"
  if awk "BEGIN { if ($read_test_time < $read_time_threshold) exit 0; else exit 1; }"; then
    log "Disk ${dir} READ performance validation passed.."
  else
    log_stderr "ERROR: Disk ${dir} READ performance validation failed. does not meet requirements: ${read_time_threshold}s." 5
  fi
  handle_exit_status "validate_disk_performance_read ${dir}"
}

function validate_disk_performance() {
  # If on-prem and first time k8s install + check_disk_utilization...
  if check_condition ${check_disk_utilization} && not_uninstall && ! check_condition ${update} &&
  ! check_condition ${update_k8s_version} && ! check_condition ${recover_kubernetes}; then

    local root_disk_performance_read_time=10
    local root_disk_performance_write_time=15
    local second_disk_performance_read_time=10
    local second_disk_performance_write_time=15
    local root_disk_path=/var/lib/kubelet/sisense_disk_test
    local second_disk_path=/opt/sisense/sisense_disk_test

    log "Testing disk performance on /var"
    run_command "sudo mkdir -p ${root_disk_path}" "Creating directory ${root_disk_path}"
    validate_disk_performance_write ${root_disk_path} ${root_disk_performance_write_time}
    validate_disk_performance_read ${root_disk_path} ${root_disk_performance_read_time}
    run_command "sudo rm -rf ${root_disk_path}" "Removing directory ${root_disk_path}"
    log ""

    log "Testing disk performance on /opt"
    run_command "sudo mkdir -p ${second_disk_path}" "Creating directory ${second_disk_path}"
    validate_disk_performance_write ${second_disk_path} ${second_disk_performance_write_time}
    validate_disk_performance_read ${second_disk_path} ${second_disk_performance_read_time}
    run_command "sudo rm -rf ${second_disk_path}" "Removing directory ${second_disk_path}"
    log_green "Disk performance on /var and /opt validation passed."

  fi
}

function validate_storage_packages_offline() {
  if check_condition ${offline_installer}; then
    local package=""
    local is_package_installed=""

    if [[ ${storage_type,,} == "rook-ceph" ]]; then
      package=lvm2
      if [[ ${OS} == "Ubuntu" ]]; then
        is_package_installed=$(sudo apt list --installed | grep -c ^${package}/)
      else
        is_package_installed=$(sudo yum list installed | grep -c ${package})
      fi
    elif [[ ${storage_type,,} == "nfs" || ${storage_type,,} == "efs" ]]; then
      if [[ ${OS} == "Ubuntu" ]]; then
        package=nfs-common
        is_package_installed=$(sudo apt list --installed | grep -c ^${package}/)
      else
        package=nfs-utils
        is_package_installed=$(sudo yum list installed | grep -c ${package})
      fi
    fi

    if [[ -n ${package} ]]; then
        log "Validating package ${package} is installed in offline environment."
      if [[ ${is_package_installed} -eq 0 ]]; then
        log_stderr "ERROR: Package ${package} is not installed, please install it and rerun installation." 6
      else
        log "Package ${package} is installed."
      fi
    fi
  fi
  handle_exit_status "validate_storage_packages_offline"
}

function validate_cluster_disk_size() {
  # If on-prem and first time k8s install + rook-ceph...
  if not_uninstall && ! check_condition ${update} && [[ ${storage_type,,} == "rook-ceph" ]] &&  
  [[ -n ${mongodb_disk_size} ]] && [[ -n ${zookeeper_disk_size} ]] && [[ -n ${sisense_disk_size} ]]; then

    local second_disk_metadata_gb=3
    local device_var_name=$(set | grep ${INTERNAL_IP} | grep 'k8s_nodes_.*_internal_ip' | cut -d= -f1 | sed 's/internal_ip/disk_volume_device/')
    local disk_volume_device=$(set | grep ^${device_var_name}= | cut -d= -f2)

    log "Validating disk ${disk_volume_device} is unpartitioned..."
    # Validate disk existence using parted
    local real_disk=$(sudo parted "$disk_volume_device" unit GiB print 2>&1)

    # Check if the disk is partitioned
    partitions_count=$(echo "$real_disk" | grep -E "^\s*[0-9]+" | wc -l)

    if [ "$partitions_count" -gt 0 ]; then
      log_stderr "ERROR: The disk ${disk_volume_device} is partitioned, has $partitions_count partitions, please make sure to have unpartitioned disk" 7
      handle_exit_status "validate_disk_unpartitioned"
    else
      log_green "The disk ${disk_volume_device} is unpartitioned. Validation passed."
    fi

    log "Validating disk ${disk_volume_device} is unmounted..."
    # Check for active mounts
    while IFS= read -r line; do
      device=$(echo "$line" | awk '{print $1}')
      mount_point=$(echo "$line" | awk '{print $2}')
      
      if [ "$device" == "$disk_volume_device" ]; then
        log_stderr "ERROR: The partition $device is mounted to $mount_point" 7
        handle_exit_status "validate_cluster_disk_size"
      fi
    done < <(mount)
    log_green "The disk ${disk_volume_device} is unmounted. Validation passed."

    log "Validating disk ${disk_volume_device} size is larger than total configured size..."
    # Calculate total configured size
    total_configured_size=$((mongodb_disk_size * 3 + zookeeper_disk_size * 3 + sisense_disk_size + second_disk_metadata_gb))
    
    # Get real disk size
    real_disk_size=$(echo "$real_disk" | awk -F ': ' '/Disk \/dev/ {sub(/GiB/, "", $2); print $2}')
    
    # Print debug information
    log "In node ${NODE_NAME}, The disk is $disk_volume_device and its size is $real_disk_size GB"
    log "The MongoDB replica size is: $mongodb_disk_size GB"
    log "The Zookeeper replica size is: $zookeeper_disk_size GB"
    log "The Sisense Persistence volume size is: $sisense_disk_size GB"
    log "The Metadata size is: $second_disk_metadata_gb GB"
    log "The total needed disk size is $total_configured_size GB"
    log "The disk size is $real_disk_size GB, and total needed disk size is $total_configured_size GB"
    
    # Validate disk size
    if [ "$real_disk_size" -lt "$total_configured_size" ]; then
      log_stderr "ERROR: Disk ${disk_volume_device} size ($real_disk_size GB) is less than total needed disk size ($total_configured_size GB)!" 7
    else
      log_green "Disk ${disk_volume_device} size is larger than total needed disk size. Validation passed."
    fi
  fi
  handle_exit_status "validate_cluster_disk_size"
}

function validate_avx_support() {
    if grep -q avx /proc/cpuinfo; then
        log "AVX is supported on this CPU."
    else
        log_stderr  "AVX is not supported on this CPU."
        ret 5
    fi
    handle_exit_status "validate_avx_support"
}

################
##### MAIN #####
################
set_node_name_and_ip
detect_os
validate_os_version
test_sudo_permissions
configure_sysctl
check_nm_cloud_setup
disable_fireall_services
validate_cpu_ram
validate_ports_wrapper
validate_docker_disk_size
validate_opt_disk_configuration
validate_disk_performance
validate_cluster_disk_size
validate_storage_packages_offline
validate_avx_support

log_green "All Sisense validations passed for node ${NODE_NAME}"
