Context Navigation

← Previous Change
Next Change →

analyse_netcdf.py

Timestamp:

May 28, 2025, 5:31:59 PM (3 weeks ago)

Author:

jbclement

Message:

Mars PCM:
Big improvement of Python scripts in util folder to analyse/display variables in NetCDF files.
JBC

File:

: 1 edited

trunk/LMDZ.MARS/util/analyse_netcdf.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/LMDZ.MARS/util/analyse_netcdf.py

-                      r3648
+                      r3783
+#!/usr/bin/env python3
 ############################################################
 ### Python script to analyse a NetCDF file for debugging ###
 ############################################################
+### This script gives useful information about a NetCDF file
+### to help for debugging. For each variable, it outputs the
+### dimensions, the min & max values, the average value and
+### warns the user in case of NaN or negative values.
+### The file name is asked to the user in the terminal.
+"""
+For each numeric variable, it outputs:
+  - Dimensions and shape
+  - Minimum & maximum values (ignoring NaNs)
+  - Mean value (ignoring NaNs)
+  - Warnings if the variable is entirely NaN or contains any NaNs/negative values
+Usage:
+) Command-line mode:
+       python analyze_netcdf.py /path/to/your_file.nc
+) Interactive mode through the prompt:
+       python analyze_netcdf.py
+"""
 import os
+import sys
+import glob
 import readline
+import glob
+import argparse
+import numpy as np
 from netCDF4 import Dataset
-import numpy as np
+############################################################
+### Setup readline for file name autocompletion
+def complete(text,state):
+    line = readline.get_line_buffer().split()
+    # Use glob to find all matching files/directories for the current text
+    if '*' not in text:
+        text += '*'
+    matches = glob.glob(os.path.expanduser(text))
+    # Add '/' if the match is a directory
+    matches = [match + '/' if os.path.isdir(match) else match for match in matches]
+def complete_filename(text, state):
+    """
+    Tab-completion function for readline: completes filesystem paths.
+    Appends '/' if the match is a directory.
+    """
+    # The text forms a partial path; glob for matching entries
+    if "*" not in text:
+        text_glob = text + "*"
+    else:
+        text_glob = text
+    matches = glob.glob(os.path.expanduser(text_glob))
+    # Add a trailing slash for directories
+    matches = [m + "/" if os.path.isdir(m) else m for m in matches]
     try:
         return matches[state]
 …
         return None
+### Function to analyze a variable in a NetCDF file
 def analyze_variable(variable):
+    # Get the data for the variable
+    data = variable[:]
+    """
+    Print summary statistics (min, max, mean) for a numeric NetCDF variable.
+    Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
+    """
+    name = variable.name
+    dims = variable.dimensions
+    shape = variable.shape
+    # Calculate min, max and mean
+    if np.isnan(data).all():
+        min_val = np.nan
+        max_val = np.nan
+        mean_val = np.nan
+    else:
+        data_min = np.nanmin(data) # Min value ignoring NaN
+        data_max = np.nanmax(data) # Max value ignoring NaN
+        data_mean = np.nanmean(data) # Mean value ignoring NaN
+    # Check if there are any NaN values
+    try:
+        # Read the entire array into memory; this may be large for huge datasets
+        data = variable[:]
+    except Exception as e:
+        print(f"\nUnable to read variable '{name}': {e}")
+        return
+    # If the array is a masked array, convert to a NumPy array with masked values as np.nan
+    if hasattr(data, "mask"):
+        # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
+        data = np.where(data.mask, np.nan, data.data)
+    # Determine if the variable has any valid (finite) data at all
+    if np.all(np.isnan(data)):
+        # Entirely NaN (or entirely masked)
+        print(f"\nAnalysis of variable: {name}")
+        print(f"  Dimensions: {dims}")
+        print(f"  Shape     : {shape}")
+        print("  Entire variable is NaN or masked.")
+        return
+    # Compute min, max, mean ignoring NaNs
+    data_min = np.nanmin(data)
+    data_max = np.nanmax(data)
+    data_mean = np.nanmean(data)
+    # Check for presence of NaNs and negative values
     has_nan = np.isnan(data).any()
+    has_negative = np.any(data < 0)
+    # Check for negative values
+    has_negative = (data < 0).any()
+    # Print the results
+    print(f"\nAnalysis of variable: {variable.name}")
+    print(f"  Dimensions: {variable.dimensions}")
+    # Output
+    print(f"\nAnalysis of variable: {name}")
+    print(f"  Dimensions: {dims}")
+    print(f"  Shape     : {shape}")
     print(f"  Min value : {data_min:>12.6e}")
     print(f"  Max value : {data_max:>12.6e}")
 …
         print(f"  \033[93mWarning: contains negative values!\033[0m")
+### Main function
+def analyze_netcdf():
     # Ask for the file name
     readline.set_completer(complete)
     readline.parse_and_bind('tab: complete')
     file = input("Enter the name of the NetCDF file: ")
+    # Open the NetCDF file
+def analyze_netcdf_file(nc_path):
+    """
+    Open the NetCDF file at nc_path and analyze each numeric variable.
+    """
+    if not os.path.isfile(nc_path):
+        print(f"Error: File '{nc_path}' not found.")
+        return
     try:
         dataset = Dataset(file,mode='r')
     except FileNotFoundError:
         print(f"File '{file}' not found.")
+        ds = Dataset(nc_path, mode='r')
+    except Exception as e:
+        print(f"Error: Unable to open '{nc_path}': {e}")
         return
+    # Iterate through all variables in the dataset to analyze them
+    for variable_name in dataset.variables:
+        variable = dataset.variables[variable_name]
+        if np.issubdtype(variable[:].dtype,np.number):
+    print(f"\nOpened NetCDF file: {nc_path}")
+    print(f"Number of variables: {len(ds.variables)}")
+    for var_name, variable in ds.variables.items():
+        # Attempt to check if the dtype is numeric
+        try:
+            dtype = variable.dtype
+        except Exception:
+            # If reading dtype fails, skip it
+            print(f"\nSkipping variable with unknown type: {var_name}")
+            continue
+        if np.issubdtype(dtype, np.number) or hasattr(variable[:], "mask"):
             analyze_variable(variable)
         else:
+            print(f"\nSkipping non-numeric variable: {variable.name}")
+    # Close the NetCDF file
+    dataset.close()
+            print(f"\nSkipping non-numeric variable: {var_name}")
+### Call the main function
+analyze_netcdf()
+    ds.close()
+    print("\nFinished analysis.\n")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
+    )
+    parser.add_argument(
+        "nc_file",
+        nargs="?",
+        help="Path to the NetCDF file (if omitted, you'll be prompted)."
+    )
+    args = parser.parse_args()
+    if args.nc_file:
+        # Command-line mode: directly analyze the provided file path
+        analyze_netcdf_file(args.nc_file)
+    else:
+        # Interactive mode: enable tab completion for filenames
+        readline.set_completer(complete_filename)
+        readline.parse_and_bind("tab: complete")
+        try:
+            user_input = input("Enter the path to the NetCDF file: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            print("\nExiting.")
+            return
+        if not user_input:
+            print("No file specified. Exiting.")
+            return
+        analyze_netcdf_file(user_input)
+if __name__ == "__main__":
+    main()

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 3783 for trunk/LMDZ.MARS/util/analyse_netcdf.py

Legend:

trunk/LMDZ.MARS/util/analyse_netcdf.py

Download in other formats: