options(repos = c(CRAN = "https://cloud.r-project.org"))

# Install 'remotes' if it's not already installed
if (!require("remotes")) {
  install.packages("remotes", dependencies = TRUE)
  library(remotes)
}

# Load necessary library and install a specific version if not present
if (!requireNamespace("digest", quietly = TRUE)) {
  remotes::install_version("digest", version = "0.6.25", repos = "https://cloud.r-project.org")
}
# Load necessary library
if (!require("digest")) install.packages("digest", dependencies = TRUE)
library(digest)

# Define variables
# wdir="/shared/projects/2427_data_master/user/agonzalez/m2bsgreprod/src"
wdir="."
results_dir <- file.path(wdir, "results")
url <- "https://d1ypx1ckp5bo16.cloudfront.net/penncath/penncath.tar.gz"
dest_file <- file.path(results_dir, "penncath.tar.gz")
expected_md5 <- "5d5f422aeafdd2d725ad93f447d9af4b"

# Create results directory if it doesn't exist
if (!dir.exists(results_dir)) {
  dir.create(results_dir, recursive = TRUE)
}

# Check if the file exists
if (!file.exists(dest_file)) {
  message("File does not exist. Downloading...")
  # Download the file
  download.file(url, dest_file, method = "auto")
} else {
  message("File already exists.")
}

# Verify the MD5 checksum
actual_md5 <- digest(dest_file, algo = "md5", file = TRUE)

if (actual_md5 == expected_md5) {
  message("MD5 checksum matches! Proceeding to extract the file...")
  
  # Uncompress the file
  untar(dest_file, exdir = results_dir)
  message("File uncompressed successfully!")
  
} else {
  stop("MD5 checksum does not match!")
}