source: trunk/LMDZ.MARS/util/analyse_netcdf.py @ 3807

Last change on this file since 3807 was 3798, checked in by jbclement, 12 days ago

Mars PCM:
Handle correctly more variables with different types/shapes/dimensions.
JBC

  • Property svn:executable set to *
File size: 5.0 KB
Line 
1#!/usr/bin/env python3
2############################################################
3### Python script to analyse a NetCDF file for debugging ###
4############################################################
5
6
7"""
8For each numeric variable, it outputs:
9  - Dimensions and shape
10  - Minimum & maximum values (ignoring NaNs)
11  - Mean value (ignoring NaNs)
12  - Warnings if the variable is entirely NaN or contains any NaNs/negative values
13
14Usage:
15  1) Command-line mode:
16       python analyze_netcdf.py /path/to/your_file.nc
17 
18  2) Interactive mode through the prompt:
19       python analyze_netcdf.py
20"""
21
22
23import os
24import sys
25import glob
26import readline
27import argparse
28import numpy as np
29from netCDF4 import Dataset
30
31
32def complete_filename(text, state):
33    """
34    Tab-completion function for readline: completes filesystem paths.
35    Appends '/' if the match is a directory.
36    """
37    # The text forms a partial path; glob for matching entries
38    if "*" not in text:
39        text_glob = text + "*"
40    else:
41        text_glob = text
42    matches = glob.glob(os.path.expanduser(text_glob))
43    # Add a trailing slash for directories
44    matches = [m + "/" if os.path.isdir(m) else m for m in matches]
45    try:
46        return matches[state]
47    except IndexError:
48        return None
49
50
51def analyze_variable(variable):
52    """
53    Print summary statistics (min, max, mean) for a numeric NetCDF variable.
54    Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
55    """
56    name = variable.name
57    dims = variable.dimensions
58    shape = variable.shape
59   
60    try:
61        # Read the entire array into memory; this may be large for huge datasets
62        data = variable[:]
63    except Exception as e:
64        print(f"\nUnable to read variable '{name}': {e}")
65        return
66
67    # If the array is a masked array, convert to a NumPy array with masked values as np.nan
68    if hasattr(data, "mask"):
69        # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
70        data = np.where(data.mask, np.nan, data.data)
71
72    # Determine if the variable has any valid (finite) data at all
73    if np.all(np.isnan(data)):
74        # Entirely NaN (or entirely masked)
75        print(f"\nAnalysis of variable: {name}")
76        print(f"  Dimensions: {dims}")
77        print(f"  Shape     : {shape}")
78        print("  Entire variable is NaN or masked.")
79        return
80
81    # Compute min, max, mean ignoring NaNs
82    data_min = np.nanmin(data)
83    data_max = np.nanmax(data)
84    data_mean = np.nanmean(data)
85
86    # Check for presence of NaNs and negative values
87    has_nan = np.isnan(data).any()
88    has_negative = np.any(data < 0)
89
90    # Output
91    print(f"\nAnalysis of variable: {name}")
92    print(f"  Dimensions: {dims}")
93    print(f"  Shape     : {shape}")
94    print(f"  Min value : {data_min:>12.6e}")
95    print(f"  Max value : {data_max:>12.6e}")
96    print(f"  Mean value: {data_mean:>12.6e}")
97    if has_nan:
98        print(f\033[91mContains NaN values!\033[0m")
99    if has_negative:
100        print(f\033[93mWarning: contains negative values!\033[0m")
101
102def analyze_netcdf_file(nc_path):
103    """
104    Open the NetCDF file at nc_path and analyze each numeric variable.
105    """
106    if not os.path.isfile(nc_path):
107        print(f"Error: File '{nc_path}' not found.")
108        return
109
110    try:
111        ds = Dataset(nc_path, mode='r')
112    except Exception as e:
113        print(f"Error: Unable to open '{nc_path}': {e}")
114        return
115
116    print(f"\nOpened NetCDF file: {nc_path}")
117    print(f"Number of variables: {len(ds.variables)}")
118
119    for var_name, variable in ds.variables.items():
120        # Attempt to check if the dtype is numeric
121        try:
122            dtype = variable.dtype
123        except Exception:
124            # If reading dtype fails, skip it
125            print(f"\nSkipping variable with unknown type: {var_name}")
126            continue
127
128        if np.issubdtype(dtype, np.number):
129            analyze_variable(variable)
130        else:
131            print(f"\nSkipping non-numeric variable: {var_name}")
132
133    ds.close()
134    print("\nFinished analysis.\n")
135
136
137def main():
138    parser = argparse.ArgumentParser(
139        description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
140    )
141    parser.add_argument(
142        "nc_file",
143        nargs="?",
144        help="Path to the NetCDF file (if omitted, you'll be prompted)."
145    )
146    args = parser.parse_args()
147
148    if args.nc_file:
149        # Command-line mode: directly analyze the provided file path
150        analyze_netcdf_file(args.nc_file)
151    else:
152        # Interactive mode: enable tab completion for filenames
153        readline.set_completer(complete_filename)
154        readline.parse_and_bind("tab: complete")
155        try:
156            user_input = input("Enter the path to the NetCDF file: ").strip()
157        except (EOFError, KeyboardInterrupt):
158            print("\nExiting.")
159            return
160
161        if not user_input:
162            print("No file specified. Exiting.")
163            return
164
165        analyze_netcdf_file(user_input)
166
167
168if __name__ == "__main__":
169    main()
170
Note: See TracBrowser for help on using the repository browser.