Ignore:
Timestamp:
May 28, 2025, 5:31:59 PM (3 weeks ago)
Author:
jbclement
Message:

Mars PCM:
Big improvement of Python scripts in util folder to analyse/display variables in NetCDF files.
JBC

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.MARS/util/analyse_netcdf.py

    r3648 r3783  
     1#!/usr/bin/env python3
    12############################################################
    23### Python script to analyse a NetCDF file for debugging ###
    34############################################################
    45
    5 ### This script gives useful information about a NetCDF file
    6 ### to help for debugging. For each variable, it outputs the
    7 ### dimensions, the min & max values, the average value and
    8 ### warns the user in case of NaN or negative values.
    9 ### The file name is asked to the user in the terminal.
     6
     7"""
     8For each numeric variable, it outputs:
     9  - Dimensions and shape
     10  - Minimum & maximum values (ignoring NaNs)
     11  - Mean value (ignoring NaNs)
     12  - Warnings if the variable is entirely NaN or contains any NaNs/negative values
     13
     14Usage:
     15  1) Command-line mode:
     16       python analyze_netcdf.py /path/to/your_file.nc
     17 
     18  2) Interactive mode through the prompt:
     19       python analyze_netcdf.py
     20"""
     21
    1022
    1123import os
     24import sys
     25import glob
    1226import readline
    13 import glob
     27import argparse
     28import numpy as np
    1429from netCDF4 import Dataset
    15 import numpy as np
    1630
    17 ############################################################
    18 ### Setup readline for file name autocompletion
    19 def complete(text,state):
    20     line = readline.get_line_buffer().split()
    21     # Use glob to find all matching files/directories for the current text
    22     if '*' not in text:
    23         text += '*'
    24     matches = glob.glob(os.path.expanduser(text))
    25     # Add '/' if the match is a directory
    26     matches = [match + '/' if os.path.isdir(match) else match for match in matches]
    27    
     31
     32def complete_filename(text, state):
     33    """
     34    Tab-completion function for readline: completes filesystem paths.
     35    Appends '/' if the match is a directory.
     36    """
     37    # The text forms a partial path; glob for matching entries
     38    if "*" not in text:
     39        text_glob = text + "*"
     40    else:
     41        text_glob = text
     42    matches = glob.glob(os.path.expanduser(text_glob))
     43    # Add a trailing slash for directories
     44    matches = [m + "/" if os.path.isdir(m) else m for m in matches]
    2845    try:
    2946        return matches[state]
     
    3148        return None
    3249
    33 ### Function to analyze a variable in a NetCDF file
     50
    3451def analyze_variable(variable):
    35     # Get the data for the variable
    36     data = variable[:]
     52    """
     53    Print summary statistics (min, max, mean) for a numeric NetCDF variable.
     54    Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
     55    """
     56    name = variable.name
     57    dims = variable.dimensions
     58    shape = variable.shape
    3759   
    38     # Calculate min, max and mean
    39     if np.isnan(data).all():
    40         min_val = np.nan
    41         max_val = np.nan
    42         mean_val = np.nan
    43     else:
    44         data_min = np.nanmin(data) # Min value ignoring NaN
    45         data_max = np.nanmax(data) # Max value ignoring NaN
    46         data_mean = np.nanmean(data) # Mean value ignoring NaN
    47    
    48     # Check if there are any NaN values
     60    try:
     61        # Read the entire array into memory; this may be large for huge datasets
     62        data = variable[:]
     63    except Exception as e:
     64        print(f"\nUnable to read variable '{name}': {e}")
     65        return
     66
     67    # If the array is a masked array, convert to a NumPy array with masked values as np.nan
     68    if hasattr(data, "mask"):
     69        # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
     70        data = np.where(data.mask, np.nan, data.data)
     71
     72    # Determine if the variable has any valid (finite) data at all
     73    if np.all(np.isnan(data)):
     74        # Entirely NaN (or entirely masked)
     75        print(f"\nAnalysis of variable: {name}")
     76        print(f"  Dimensions: {dims}")
     77        print(f"  Shape     : {shape}")
     78        print("  Entire variable is NaN or masked.")
     79        return
     80
     81    # Compute min, max, mean ignoring NaNs
     82    data_min = np.nanmin(data)
     83    data_max = np.nanmax(data)
     84    data_mean = np.nanmean(data)
     85
     86    # Check for presence of NaNs and negative values
    4987    has_nan = np.isnan(data).any()
     88    has_negative = np.any(data < 0)
    5089
    51     # Check for negative values
    52     has_negative = (data < 0).any()
    53    
    54     # Print the results
    55     print(f"\nAnalysis of variable: {variable.name}")
    56     print(f"  Dimensions: {variable.dimensions}")
     90    # Output
     91    print(f"\nAnalysis of variable: {name}")
     92    print(f"  Dimensions: {dims}")
     93    print(f"  Shape     : {shape}")
    5794    print(f"  Min value : {data_min:>12.6e}")
    5895    print(f"  Max value : {data_max:>12.6e}")
     
    63100        print(f"  \033[93mWarning: contains negative values!\033[0m")
    64101
    65 ### Main function
    66 def analyze_netcdf():
    67     # Ask for the file name
    68     readline.set_completer(complete)
    69     readline.parse_and_bind('tab: complete')
    70     file = input("Enter the name of the NetCDF file: ")
    71    
    72     # Open the NetCDF file
     102def analyze_netcdf_file(nc_path):
     103    """
     104    Open the NetCDF file at nc_path and analyze each numeric variable.
     105    """
     106    if not os.path.isfile(nc_path):
     107        print(f"Error: File '{nc_path}' not found.")
     108        return
     109
    73110    try:
    74         dataset = Dataset(file,mode='r')
    75     except FileNotFoundError:
    76         print(f"File '{file}' not found.")
     111        ds = Dataset(nc_path, mode='r')
     112    except Exception as e:
     113        print(f"Error: Unable to open '{nc_path}': {e}")
    77114        return
    78    
    79     # Iterate through all variables in the dataset to analyze them
    80     for variable_name in dataset.variables:
    81         variable = dataset.variables[variable_name]
    82         if np.issubdtype(variable[:].dtype,np.number):
     115
     116    print(f"\nOpened NetCDF file: {nc_path}")
     117    print(f"Number of variables: {len(ds.variables)}")
     118
     119    for var_name, variable in ds.variables.items():
     120        # Attempt to check if the dtype is numeric
     121        try:
     122            dtype = variable.dtype
     123        except Exception:
     124            # If reading dtype fails, skip it
     125            print(f"\nSkipping variable with unknown type: {var_name}")
     126            continue
     127
     128        if np.issubdtype(dtype, np.number) or hasattr(variable[:], "mask"):
    83129            analyze_variable(variable)
    84130        else:
    85             print(f"\nSkipping non-numeric variable: {variable.name}")
    86    
    87     # Close the NetCDF file
    88     dataset.close()
     131            print(f"\nSkipping non-numeric variable: {var_name}")
    89132
    90 ### Call the main function
    91 analyze_netcdf()
     133    ds.close()
     134    print("\nFinished analysis.\n")
     135
     136
     137def main():
     138    parser = argparse.ArgumentParser(
     139        description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
     140    )
     141    parser.add_argument(
     142        "nc_file",
     143        nargs="?",
     144        help="Path to the NetCDF file (if omitted, you'll be prompted)."
     145    )
     146    args = parser.parse_args()
     147
     148    if args.nc_file:
     149        # Command-line mode: directly analyze the provided file path
     150        analyze_netcdf_file(args.nc_file)
     151    else:
     152        # Interactive mode: enable tab completion for filenames
     153        readline.set_completer(complete_filename)
     154        readline.parse_and_bind("tab: complete")
     155        try:
     156            user_input = input("Enter the path to the NetCDF file: ").strip()
     157        except (EOFError, KeyboardInterrupt):
     158            print("\nExiting.")
     159            return
     160
     161        if not user_input:
     162            print("No file specified. Exiting.")
     163            return
     164
     165        analyze_netcdf_file(user_input)
     166
     167
     168if __name__ == "__main__":
     169    main()
     170
Note: See TracChangeset for help on using the changeset viewer.