Changeset 3783 for trunk/LMDZ.MARS/util/analyse_netcdf.py
- Timestamp:
- May 28, 2025, 5:31:59 PM (3 weeks ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LMDZ.MARS/util/analyse_netcdf.py
r3648 r3783 1 #!/usr/bin/env python3 1 2 ############################################################ 2 3 ### Python script to analyse a NetCDF file for debugging ### 3 4 ############################################################ 4 5 5 ### This script gives useful information about a NetCDF file 6 ### to help for debugging. For each variable, it outputs the 7 ### dimensions, the min & max values, the average value and 8 ### warns the user in case of NaN or negative values. 9 ### The file name is asked to the user in the terminal. 6 7 """ 8 For each numeric variable, it outputs: 9 - Dimensions and shape 10 - Minimum & maximum values (ignoring NaNs) 11 - Mean value (ignoring NaNs) 12 - Warnings if the variable is entirely NaN or contains any NaNs/negative values 13 14 Usage: 15 1) Command-line mode: 16 python analyze_netcdf.py /path/to/your_file.nc 17 18 2) Interactive mode through the prompt: 19 python analyze_netcdf.py 20 """ 21 10 22 11 23 import os 24 import sys 25 import glob 12 26 import readline 13 import glob 27 import argparse 28 import numpy as np 14 29 from netCDF4 import Dataset 15 import numpy as np16 30 17 ############################################################ 18 ### Setup readline for file name autocompletion 19 def complete(text,state): 20 line = readline.get_line_buffer().split() 21 # Use glob to find all matching files/directories for the current text 22 if '*' not in text: 23 text += '*' 24 matches = glob.glob(os.path.expanduser(text)) 25 # Add '/' if the match is a directory 26 matches = [match + '/' if os.path.isdir(match) else match for match in matches] 27 31 32 def complete_filename(text, state): 33 """ 34 Tab-completion function for readline: completes filesystem paths. 35 Appends '/' if the match is a directory. 36 """ 37 # The text forms a partial path; glob for matching entries 38 if "*" not in text: 39 text_glob = text + "*" 40 else: 41 text_glob = text 42 matches = glob.glob(os.path.expanduser(text_glob)) 43 # Add a trailing slash for directories 44 matches = [m + "/" if os.path.isdir(m) else m for m in matches] 28 45 try: 29 46 return matches[state] … … 31 48 return None 32 49 33 ### Function to analyze a variable in a NetCDF file 50 34 51 def analyze_variable(variable): 35 # Get the data for the variable 36 data = variable[:] 52 """ 53 Print summary statistics (min, max, mean) for a numeric NetCDF variable. 54 Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist. 55 """ 56 name = variable.name 57 dims = variable.dimensions 58 shape = variable.shape 37 59 38 # Calculate min, max and mean 39 if np.isnan(data).all(): 40 min_val = np.nan 41 max_val = np.nan 42 mean_val = np.nan 43 else: 44 data_min = np.nanmin(data) # Min value ignoring NaN 45 data_max = np.nanmax(data) # Max value ignoring NaN 46 data_mean = np.nanmean(data) # Mean value ignoring NaN 47 48 # Check if there are any NaN values 60 try: 61 # Read the entire array into memory; this may be large for huge datasets 62 data = variable[:] 63 except Exception as e: 64 print(f"\nUnable to read variable '{name}': {e}") 65 return 66 67 # If the array is a masked array, convert to a NumPy array with masked values as np.nan 68 if hasattr(data, "mask"): 69 # Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly 70 data = np.where(data.mask, np.nan, data.data) 71 72 # Determine if the variable has any valid (finite) data at all 73 if np.all(np.isnan(data)): 74 # Entirely NaN (or entirely masked) 75 print(f"\nAnalysis of variable: {name}") 76 print(f" Dimensions: {dims}") 77 print(f" Shape : {shape}") 78 print(" Entire variable is NaN or masked.") 79 return 80 81 # Compute min, max, mean ignoring NaNs 82 data_min = np.nanmin(data) 83 data_max = np.nanmax(data) 84 data_mean = np.nanmean(data) 85 86 # Check for presence of NaNs and negative values 49 87 has_nan = np.isnan(data).any() 88 has_negative = np.any(data < 0) 50 89 51 # Check for negative values 52 has_negative = (data < 0).any() 53 54 # Print the results 55 print(f"\nAnalysis of variable: {variable.name}") 56 print(f" Dimensions: {variable.dimensions}") 90 # Output 91 print(f"\nAnalysis of variable: {name}") 92 print(f" Dimensions: {dims}") 93 print(f" Shape : {shape}") 57 94 print(f" Min value : {data_min:>12.6e}") 58 95 print(f" Max value : {data_max:>12.6e}") … … 63 100 print(f" \033[93mWarning: contains negative values!\033[0m") 64 101 65 ### Main function 66 def analyze_netcdf(): 67 # Ask for the file name68 readline.set_completer(complete)69 readline.parse_and_bind('tab: complete')70 file = input("Enter the name of the NetCDF file:")71 72 # Open the NetCDF file 102 def analyze_netcdf_file(nc_path): 103 """ 104 Open the NetCDF file at nc_path and analyze each numeric variable. 105 """ 106 if not os.path.isfile(nc_path): 107 print(f"Error: File '{nc_path}' not found.") 108 return 109 73 110 try: 74 d ataset = Dataset(file,mode='r')75 except FileNotFoundError:76 print(f" File '{file}' not found.")111 ds = Dataset(nc_path, mode='r') 112 except Exception as e: 113 print(f"Error: Unable to open '{nc_path}': {e}") 77 114 return 78 79 # Iterate through all variables in the dataset to analyze them 80 for variable_name in dataset.variables: 81 variable = dataset.variables[variable_name] 82 if np.issubdtype(variable[:].dtype,np.number): 115 116 print(f"\nOpened NetCDF file: {nc_path}") 117 print(f"Number of variables: {len(ds.variables)}") 118 119 for var_name, variable in ds.variables.items(): 120 # Attempt to check if the dtype is numeric 121 try: 122 dtype = variable.dtype 123 except Exception: 124 # If reading dtype fails, skip it 125 print(f"\nSkipping variable with unknown type: {var_name}") 126 continue 127 128 if np.issubdtype(dtype, np.number) or hasattr(variable[:], "mask"): 83 129 analyze_variable(variable) 84 130 else: 85 print(f"\nSkipping non-numeric variable: {variable.name}") 86 87 # Close the NetCDF file 88 dataset.close() 131 print(f"\nSkipping non-numeric variable: {var_name}") 89 132 90 ### Call the main function 91 analyze_netcdf() 133 ds.close() 134 print("\nFinished analysis.\n") 135 136 137 def main(): 138 parser = argparse.ArgumentParser( 139 description="Analyze a NetCDF file and report min/max/mean for each numeric variable." 140 ) 141 parser.add_argument( 142 "nc_file", 143 nargs="?", 144 help="Path to the NetCDF file (if omitted, you'll be prompted)." 145 ) 146 args = parser.parse_args() 147 148 if args.nc_file: 149 # Command-line mode: directly analyze the provided file path 150 analyze_netcdf_file(args.nc_file) 151 else: 152 # Interactive mode: enable tab completion for filenames 153 readline.set_completer(complete_filename) 154 readline.parse_and_bind("tab: complete") 155 try: 156 user_input = input("Enter the path to the NetCDF file: ").strip() 157 except (EOFError, KeyboardInterrupt): 158 print("\nExiting.") 159 return 160 161 if not user_input: 162 print("No file specified. Exiting.") 163 return 164 165 analyze_netcdf_file(user_input) 166 167 168 if __name__ == "__main__": 169 main() 170
Note: See TracChangeset
for help on using the changeset viewer.