############################################################
### Python script to analyse a NetCDF file for debugging ###
############################################################

### This script gives useful information about a NetCDF file
### to help for debugging. For each variable, it outputs the
### dimensions, the min & max values, the average value and
### warns the user in case of NaN or negative values.
### The file name is asked to the user in the terminal.

import os
import readline
import glob
from netCDF4 import Dataset
import numpy as np

############################################################
### Setup readline for file name autocompletion
def complete(text,state):
    line = readline.get_line_buffer().split()
    # Use glob to find all matching files/directories for the current text
    if '*' not in text:
        text += '*'
    matches = glob.glob(os.path.expanduser(text))
    # Add '/' if the match is a directory
    matches = [match + '/' if os.path.isdir(match) else match for match in matches]
    
    try:
        return matches[state]
    except IndexError:
        return None

### Function to analyze a variable in a NetCDF file
def analyze_variable(variable):
    # Get the data for the variable
    data = variable[:]
    
    # Calculate min, max and mean
    data_min = np.nanmin(data) # Min value ignoring NaN
    data_max = np.nanmax(data) # Max value ignoring NaN
    data_mean = np.nanmean(data) # Mean value ignoring NaN
    
    # Check if there are any NaN values
    has_nan = np.isnan(data).any()

    # Check for negative values
    has_negative = (data < 0).any()
    
    # Print the results
    print(f"\nAnalysis of variable: {variable.name}")
    print(f"  Dimensions: {variable.dimensions}")
    print(f"  Min value : {data_min:>12.6e}")
    print(f"  Max value : {data_max:>12.6e}")
    print(f"  Mean value: {data_mean:>12.6e}")
    if has_nan:
        print(f"  \033[91mContains NaN values!\033[0m")
    if has_negative:
        print(f"  \033[93mWarning: contains negative values!\033[0m")

### Main function
def analyze_netcdf():
    # Ask for the file name
    readline.set_completer(complete)
    readline.parse_and_bind('tab: complete')
    file = input("Enter the name of the NetCDF file: ")
    
    # Open the NetCDF file
    try:
        dataset = Dataset(file,mode='r')
    except FileNotFoundError:
        print(f"File '{file}' not found.")
        return
    
    # Iterate through all variables in the dataset to analyze them
    for variable_name in dataset.variables:
        variable = dataset.variables[variable_name]
        analyze_variable(variable)
    
    # Close the NetCDF file
    dataset.close()

### Call the main function
analyze_netcdf()

