#!/usr/bin/env python3
############################################################
### Python script to analyse a NetCDF file for debugging ###
############################################################


"""
For each numeric variable, it outputs:
  - Dimensions and shape
  - Minimum & maximum values (ignoring NaNs)
  - Mean value (ignoring NaNs)
  - Warnings if the variable is entirely NaN or contains any NaNs/negative values

Usage:
  1) Command-line mode:
       python analyze_netcdf.py /path/to/your_file.nc
  
  2) Interactive mode through the prompt:
       python analyze_netcdf.py
"""


import os
import sys
import glob
import readline
import argparse
import numpy as np
from netCDF4 import Dataset


def complete_filename(text, state):
    """
    Tab-completion function for readline: completes filesystem paths.
    Appends '/' if the match is a directory.
    """
    # The text forms a partial path; glob for matching entries
    if "*" not in text:
        text_glob = text + "*"
    else:
        text_glob = text
    matches = glob.glob(os.path.expanduser(text_glob))
    # Add a trailing slash for directories
    matches = [m + "/" if os.path.isdir(m) else m for m in matches]
    try:
        return matches[state]
    except IndexError:
        return None


def analyze_variable(variable):
    """
    Print summary statistics (min, max, mean) for a numeric NetCDF variable.
    Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
    """
    name = variable.name
    dims = variable.dimensions
    shape = variable.shape
    
    try:
        # Read the entire array into memory; this may be large for huge datasets
        data = variable[:]
    except Exception as e:
        print(f"\nError: Unable to read variable '{name}': {e}")
        return

    # If the array is a masked array, convert to a NumPy array with masked values as np.nan
    if hasattr(data, "mask"):
        # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
        data = np.where(data.mask, np.nan, data.data)

    # Determine if the variable has any valid (finite) data at all
    if np.all(np.isnan(data)):
        # Entirely NaN (or entirely masked)
        print(f"\nAnalysis of variable: {name}")
        print(f"  Dimensions: {dims}")
        print(f"  Shape     : {shape}")
        print("  \033[91mAnomaly: entire variable is NaN or masked!\033[0m")
        return

    # Compute min, max, mean ignoring NaNs
    data_min = np.nanmin(data)
    data_max = np.nanmax(data)
    data_mean = np.nanmean(data)

    # Check for presence of NaNs and negative values
    has_nan = np.isnan(data).any()
    has_negative = np.any(data < 0)

    # Output
    print(f"\nAnalysis of variable: {name}")
    print(f"  Dimensions: {dims}")
    print(f"  Shape     : {shape}")
    print(f"  Min value : {data_min:>12.6e}")
    print(f"  Max value : {data_max:>12.6e}")
    print(f"  Mean value: {data_mean:>12.6e}")
    if has_nan:
        print(f"  \033[91mAnomaly: contains NaN values!\033[0m")
    if has_negative:
        print(f"  \033[93mCaution: contains negative values!\033[0m")

def analyze_netcdf_file(nc_path):
    """
    Open the NetCDF file at nc_path and analyze each numeric variable.
    """
    if not os.path.isfile(nc_path):
        print(f"Error: File '{nc_path}' not found.")
        return

    try:
        ds = Dataset(nc_path, mode='r')
    except Exception as e:
        print(f"Error: Unable to open '{nc_path}': {e}")
        return

    print(f"\nOpened NetCDF file: {nc_path}")
    print(f"Number of variables: {len(ds.variables)}")

    for var_name, variable in ds.variables.items():
        # Attempt to check if the dtype is numeric
        try:
            dtype = variable.dtype
        except Exception:
            # If reading dtype fails, skip it
            print(f"\nWarning: Skipping variable with unknown type: {var_name}")
            continue

        if np.issubdtype(dtype, np.number):
            analyze_variable(variable)
        else:
            print(f"\nWarning: Skipping non-numeric variable: {var_name}")

    ds.close()
    print("\nFinished analysis.\n")


def main():
    parser = argparse.ArgumentParser(
        description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
    )
    parser.add_argument(
        "nc_file",
        nargs="?",
        help="Path to the NetCDF file (if omitted, you'll be prompted)."
    )
    args = parser.parse_args()

    if args.nc_file:
        # Command-line mode: directly analyze the provided file path
        analyze_netcdf_file(args.nc_file)
    else:
        # Interactive mode: enable tab completion for filenames
        readline.set_completer(complete_filename)
        readline.parse_and_bind("tab: complete")
        try:
            user_input = input("Enter the path to the NetCDF file: ").strip()
        except (EOFError, KeyboardInterrupt):
            print("\nExiting.")
            return

        if not user_input:
            print("No file specified. Exiting.")
            return

        analyze_netcdf_file(user_input)


if __name__ == "__main__":
    main()