Context Navigation

analyse_netcdf.py @ 3985

Last change on this file since 3985 was 3808, checked in by jbclement, 6 months ago

Mars PCM:

Bug corrections for the Python script displaying variables in a NetCDF file regarding the dimensions + addition of options (for example to average over longitude).
Improvement for the Python script analyzing variables in a NetCDF file.

JBC

Property svn:executable set to *

File size: 5.0 KB

Line
1	#!/usr/bin/env python3
2	############################################################
3	### Python script to analyse a NetCDF file for debugging ###
4	############################################################
5
6
7	"""
8	For each numeric variable, it outputs:
9	- Dimensions and shape
10	- Minimum & maximum values (ignoring NaNs)
11	- Mean value (ignoring NaNs)
12	- Warnings if the variable is entirely NaN or contains any NaNs/negative values
13
14	Usage:
15	1) Command-line mode:
16	python analyze_netcdf.py /path/to/your_file.nc
17
18	2) Interactive mode through the prompt:
19	python analyze_netcdf.py
20	"""
21
22
23	import os
24	import sys
25	import glob
26	import readline
27	import argparse
28	import numpy as np
29	from netCDF4 import Dataset
30
31
32	def complete_filename(text, state):
33	"""
34	Tab-completion function for readline: completes filesystem paths.
35	Appends '/' if the match is a directory.
36	"""
37	# The text forms a partial path; glob for matching entries
38	if "*" not in text:
39	text_glob = text + "*"
40	else:
41	text_glob = text
42	matches = glob.glob(os.path.expanduser(text_glob))
43	# Add a trailing slash for directories
44	matches = [m + "/" if os.path.isdir(m) else m for m in matches]
45	try:
46	return matches[state]
47	except IndexError:
48	return None
49
50
51	def analyze_variable(variable):
52	"""
53	Print summary statistics (min, max, mean) for a numeric NetCDF variable.
54	Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
55	"""
56	name = variable.name
57	dims = variable.dimensions
58	shape = variable.shape
59
60	try:
61	# Read the entire array into memory; this may be large for huge datasets
62	data = variable[:]
63	except Exception as e:
64	print(f"\nError: Unable to read variable '{name}': {e}")
65	return
66
67	# If the array is a masked array, convert to a NumPy array with masked values as np.nan
68	if hasattr(data, "mask"):
69	# Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
70	data = np.where(data.mask, np.nan, data.data)
71
72	# Determine if the variable has any valid (finite) data at all
73	if np.all(np.isnan(data)):
74	# Entirely NaN (or entirely masked)
75	print(f"\nAnalysis of variable: {name}")
76	print(f" Dimensions: {dims}")
77	print(f" Shape : {shape}")
78	print(" \033[91mAnomaly: entire variable is NaN or masked!\033[0m")
79	return
80
81	# Compute min, max, mean ignoring NaNs
82	data_min = np.nanmin(data)
83	data_max = np.nanmax(data)
84	data_mean = np.nanmean(data)
85
86	# Check for presence of NaNs and negative values
87	has_nan = np.isnan(data).any()
88	has_negative = np.any(data < 0)
89
90	# Output
91	print(f"\nAnalysis of variable: {name}")
92	print(f" Dimensions: {dims}")
93	print(f" Shape : {shape}")
94	print(f" Min value : {data_min:>12.6e}")
95	print(f" Max value : {data_max:>12.6e}")
96	print(f" Mean value: {data_mean:>12.6e}")
97	if has_nan:
98	print(f" \033[91mAnomaly: contains NaN values!\033[0m")
99	if has_negative:
100	print(f" \033[93mCaution: contains negative values!\033[0m")
101
102	def analyze_netcdf_file(nc_path):
103	"""
104	Open the NetCDF file at nc_path and analyze each numeric variable.
105	"""
106	if not os.path.isfile(nc_path):
107	print(f"Error: File '{nc_path}' not found.")
108	return
109
110	try:
111	ds = Dataset(nc_path, mode='r')
112	except Exception as e:
113	print(f"Error: Unable to open '{nc_path}': {e}")
114	return
115
116	print(f"\nOpened NetCDF file: {nc_path}")
117	print(f"Number of variables: {len(ds.variables)}")
118
119	for var_name, variable in ds.variables.items():
120	# Attempt to check if the dtype is numeric
121	try:
122	dtype = variable.dtype
123	except Exception:
124	# If reading dtype fails, skip it
125	print(f"\nWarning: Skipping variable with unknown type: {var_name}")
126	continue
127
128	if np.issubdtype(dtype, np.number):
129	analyze_variable(variable)
130	else:
131	print(f"\nWarning: Skipping non-numeric variable: {var_name}")
132
133	ds.close()
134	print("\nFinished analysis.\n")
135
136
137	def main():
138	parser = argparse.ArgumentParser(
139	description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
140	)
141	parser.add_argument(
142	"nc_file",
143	nargs="?",
144	help="Path to the NetCDF file (if omitted, you'll be prompted)."
145	)
146	args = parser.parse_args()
147
148	if args.nc_file:
149	# Command-line mode: directly analyze the provided file path
150	analyze_netcdf_file(args.nc_file)
151	else:
152	# Interactive mode: enable tab completion for filenames
153	readline.set_completer(complete_filename)
154	readline.parse_and_bind("tab: complete")
155	try:
156	user_input = input("Enter the path to the NetCDF file: ").strip()
157	except (EOFError, KeyboardInterrupt):
158	print("\nExiting.")
159	return
160
161	if not user_input:
162	print("No file specified. Exiting.")
163	return
164
165	analyze_netcdf_file(user_input)
166
167
168	if __name__ == "__main__":
169	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/LMDZ.MARS/util/analyse_netcdf.py @ 3985

Download in other formats: