Context Navigation

source: trunk/LMDZ.MARS/util/analyse_netcdf.py @ 3807

Last change on this file since 3807 was 3798, checked in by jbclement, 2 weeks ago
Mars PCM: Handle correctly more variables with different types/shapes/dimensions. JBC
Property svn:executable set to ``*
File size: 5.0 KB

Rev	Line
[3783]	1	#!/usr/bin/env python3
[3459]	2	############################################################
	3	### Python script to analyse a NetCDF file for debugging ###
	4	############################################################
	5
	6
[3783]	7	"""
	8	For each numeric variable, it outputs:
	9	- Dimensions and shape
	10	- Minimum & maximum values (ignoring NaNs)
	11	- Mean value (ignoring NaNs)
	12	- Warnings if the variable is entirely NaN or contains any NaNs/negative values
	13
	14	Usage:
	15	1) Command-line mode:
	16	python analyze_netcdf.py /path/to/your_file.nc
	17
	18	2) Interactive mode through the prompt:
	19	python analyze_netcdf.py
	20	"""
	21
	22
[3459]	23	import os
[3783]	24	import sys
	25	import glob
[3459]	26	import readline
[3783]	27	import argparse
	28	import numpy as np
[3459]	29	from netCDF4 import Dataset
	30
[3783]	31
	32	def complete_filename(text, state):
	33	"""
	34	Tab-completion function for readline: completes filesystem paths.
	35	Appends '/' if the match is a directory.
	36	"""
	37	# The text forms a partial path; glob for matching entries
	38	if "*" not in text:
	39	text_glob = text + "*"
	40	else:
	41	text_glob = text
	42	matches = glob.glob(os.path.expanduser(text_glob))
	43	# Add a trailing slash for directories
	44	matches = [m + "/" if os.path.isdir(m) else m for m in matches]
[3459]	45	try:
	46	return matches[state]
	47	except IndexError:
	48	return None
	49
[3783]	50
[3459]	51	def analyze_variable(variable):
[3783]	52	"""
	53	Print summary statistics (min, max, mean) for a numeric NetCDF variable.
	54	Ignores NaNs when computing min/max/mean. Warns if any NaNs or negatives exist.
	55	"""
	56	name = variable.name
	57	dims = variable.dimensions
	58	shape = variable.shape
[3459]	59
[3783]	60	try:
	61	# Read the entire array into memory; this may be large for huge datasets
	62	data = variable[:]
	63	except Exception as e:
	64	print(f"\nUnable to read variable '{name}': {e}")
	65	return
	66
	67	# If the array is a masked array, convert to a NumPy array with masked values as np.nan
	68	if hasattr(data, "mask"):
	69	# Fill masked entries with NaN so that np.nanmin / np.nanmax works correctly
	70	data = np.where(data.mask, np.nan, data.data)
	71
	72	# Determine if the variable has any valid (finite) data at all
	73	if np.all(np.isnan(data)):
	74	# Entirely NaN (or entirely masked)
	75	print(f"\nAnalysis of variable: {name}")
	76	print(f" Dimensions: {dims}")
	77	print(f" Shape : {shape}")
	78	print(" Entire variable is NaN or masked.")
	79	return
	80
	81	# Compute min, max, mean ignoring NaNs
	82	data_min = np.nanmin(data)
	83	data_max = np.nanmax(data)
	84	data_mean = np.nanmean(data)
	85
	86	# Check for presence of NaNs and negative values
[3459]	87	has_nan = np.isnan(data).any()
[3783]	88	has_negative = np.any(data < 0)
[3459]	89
[3783]	90	# Output
	91	print(f"\nAnalysis of variable: {name}")
	92	print(f" Dimensions: {dims}")
	93	print(f" Shape : {shape}")
[3459]	94	print(f" Min value : {data_min:>12.6e}")
	95	print(f" Max value : {data_max:>12.6e}")
	96	print(f" Mean value: {data_mean:>12.6e}")
	97	if has_nan:
	98	print(f" \033[91mContains NaN values!\033[0m")
	99	if has_negative:
	100	print(f" \033[93mWarning: contains negative values!\033[0m")
	101
[3783]	102	def analyze_netcdf_file(nc_path):
	103	"""
	104	Open the NetCDF file at nc_path and analyze each numeric variable.
	105	"""
	106	if not os.path.isfile(nc_path):
	107	print(f"Error: File '{nc_path}' not found.")
	108	return
	109
[3459]	110	try:
[3783]	111	ds = Dataset(nc_path, mode='r')
	112	except Exception as e:
	113	print(f"Error: Unable to open '{nc_path}': {e}")
[3459]	114	return
[3783]	115
	116	print(f"\nOpened NetCDF file: {nc_path}")
	117	print(f"Number of variables: {len(ds.variables)}")
	118
	119	for var_name, variable in ds.variables.items():
	120	# Attempt to check if the dtype is numeric
	121	try:
	122	dtype = variable.dtype
	123	except Exception:
	124	# If reading dtype fails, skip it
	125	print(f"\nSkipping variable with unknown type: {var_name}")
	126	continue
	127
[3798]	128	if np.issubdtype(dtype, np.number):
[3648]	129	analyze_variable(variable)
	130	else:
[3783]	131	print(f"\nSkipping non-numeric variable: {var_name}")
[3459]	132
[3783]	133	ds.close()
	134	print("\nFinished analysis.\n")
	135
	136
	137	def main():
	138	parser = argparse.ArgumentParser(
	139	description="Analyze a NetCDF file and report min/max/mean for each numeric variable."
	140	)
	141	parser.add_argument(
	142	"nc_file",
	143	nargs="?",
	144	help="Path to the NetCDF file (if omitted, you'll be prompted)."
	145	)
	146	args = parser.parse_args()
	147
	148	if args.nc_file:
	149	# Command-line mode: directly analyze the provided file path
	150	analyze_netcdf_file(args.nc_file)
	151	else:
	152	# Interactive mode: enable tab completion for filenames
	153	readline.set_completer(complete_filename)
	154	readline.parse_and_bind("tab: complete")
	155	try:
	156	user_input = input("Enter the path to the NetCDF file: ").strip()
	157	except (EOFError, KeyboardInterrupt):
	158	print("\nExiting.")
	159	return
	160
	161	if not user_input:
	162	print("No file specified. Exiting.")
	163	return
	164
	165	analyze_netcdf_file(user_input)
	166
	167
	168	if __name__ == "__main__":
	169	main()
	170

Note: See TracBrowser for help on using the repository browser.

Download in other formats: