#!/usr/bin/env python3 ############################################################ ### Python script to analyse a NetCDF file for debugging ### ############################################################ """ For each numeric variable, it outputs: - Dimensions and shape - Minimum & maximum values (ignoring NaNs) - Mean value (ignoring NaNs) - Warnings if the variable is entirely NaN or contains any NaNs/negative values Usage: 1) Command-line mode: python analyze_netcdf.py /path/to/your_file.nc 2) Interactive mode through the prompt: python analyze_netcdf.py """ import os import sys import glob import readline import argparse import numpy as np from netCDF4 import Dataset def complete_filename(text, state): """ Tab-completion function for readline: completes filesystem paths. Appends '/' if the match is a directory. """ # The text forms a partial path; glob for matching entries if "*" not in text: text_glob = text + "*" else: text_glob = text matches = glob.glob(os.path.expanduser(text_glob)) # Add a trailing slash for directories matches = [m + "/" if os.path.isdir(m) else m for m in matches] try: return matches[state] except IndexError: return None def analyze_variable(variable): """ Print summary statistics (min, max, mean) for a numeric NetCDF variable. Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist. """ name = variable.name dims = variable.dimensions shape = variable.shape try: # Read the entire array into memory; this may be large for huge datasets data = variable[:] except Exception as e: print(f"\nUnable to read variable '{name}': {e}") return # If the array is a masked array, convert to a NumPy array with masked values as np.nan if hasattr(data, "mask"): # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly data = np.where(data.mask, np.nan, data.data) # Determine if the variable has any valid (finite) data at all if np.all(np.isnan(data)): # Entirely NaN (or entirely masked) print(f"\nAnalysis of variable: {name}") print(f" Dimensions: {dims}") print(f" Shape : {shape}") print(" Entire variable is NaN or masked.") return # Compute min, max, mean ignoring NaNs data_min = np.nanmin(data) data_max = np.nanmax(data) data_mean = np.nanmean(data) # Check for presence of NaNs and negative values has_nan = np.isnan(data).any() has_negative = np.any(data < 0) # Output print(f"\nAnalysis of variable: {name}") print(f" Dimensions: {dims}") print(f" Shape : {shape}") print(f" Min value : {data_min:>12.6e}") print(f" Max value : {data_max:>12.6e}") print(f" Mean value: {data_mean:>12.6e}") if has_nan: print(f" \033[91mContains NaN values!\033[0m") if has_negative: print(f" \033[93mWarning: contains negative values!\033[0m") def analyze_netcdf_file(nc_path): """ Open the NetCDF file at nc_path and analyze each numeric variable. """ if not os.path.isfile(nc_path): print(f"Error: File '{nc_path}' not found.") return try: ds = Dataset(nc_path, mode='r') except Exception as e: print(f"Error: Unable to open '{nc_path}': {e}") return print(f"\nOpened NetCDF file: {nc_path}") print(f"Number of variables: {len(ds.variables)}") for var_name, variable in ds.variables.items(): # Attempt to check if the dtype is numeric try: dtype = variable.dtype except Exception: # If reading dtype fails, skip it print(f"\nSkipping variable with unknown type: {var_name}") continue if np.issubdtype(dtype, np.number): analyze_variable(variable) else: print(f"\nSkipping non-numeric variable: {var_name}") ds.close() print("\nFinished analysis.\n") def main(): parser = argparse.ArgumentParser( description="Analyze a NetCDF file and report min/max/mean for each numeric variable." ) parser.add_argument( "nc_file", nargs="?", help="Path to the NetCDF file (if omitted, you'll be prompted)." ) args = parser.parse_args() if args.nc_file: # Command-line mode: directly analyze the provided file path analyze_netcdf_file(args.nc_file) else: # Interactive mode: enable tab completion for filenames readline.set_completer(complete_filename) readline.parse_and_bind("tab: complete") try: user_input = input("Enter the path to the NetCDF file: ").strip() except (EOFError, KeyboardInterrupt): print("\nExiting.") return if not user_input: print("No file specified. Exiting.") return analyze_netcdf_file(user_input) if __name__ == "__main__": main()