Context Navigation

concat_pem.py

Last change on this file was 3883, checked in by jbclement, 4 months ago

PEM:

Replacing the Bash script "concat_diagpem.sh" by a Python script "concat_pem.py" which is much faster.
Corrections and updates for "visu_evol_layering.py".

JBC

Property svn:executable set to *

File size: 5.1 KB

Line
1	#!/usr/bin/env python3
2	################################################################
3	### Python script to concatenate the NetCDF files of the PEM ###
4	### along the dimension 'Time' into one NetCDF file ###
5	################################################################
6
7
8	import os
9	import re
10	import sys
11	import glob
12	import readline
13	import argparse
14	import xarray as xr
15	import numpy as np
16
17
18	def complete_path(text, state):
19	matches = glob.glob(text + '*')
20	return matches[state] if state < len(matches) else None
21
22	readline.set_completer_delims(' \t\n;')
23	readline.set_completer(complete_path)
24	readline.parse_and_bind("tab: complete")
25
26
27	def parse_args():
28	parser = argparse.ArgumentParser(
29	description="Concatenate multiple NetCDF files along the Time dimension"
30	)
31	parser.add_argument(
32	"--folder", type=str,
33	help="Path to the directory containing the NetCDF files"
34	)
35	parser.add_argument(
36	"--basename", type=str,
37	help="Base name of the files, e.g., 'diagpem' for files like diagpem1.nc"
38	)
39	parser.add_argument(
40	"--start", type=int,
41	help="Starting index of the files to include"
42	)
43	parser.add_argument(
44	"--end", type=int,
45	help="Ending index of the files to include (inclusive)"
46	)
47	parser.add_argument(
48	"--output", type=str, default="merged.nc",
49	help="Output filename for the concatenated NetCDF"
50	)
51	return parser.parse_args()
52
53
54	def prompt_with_default(prompt_text, default, cast_fn=None):
55	prompt = f"{prompt_text} [press Enter for default {default}]: "
56	while True:
57	try:
58	user_input = input(prompt)
59	except KeyboardInterrupt:
60	print("\nInterrupted.")
61	sys.exit(1)
62
63	if not user_input.strip():
64	return default
65	try:
66	return cast_fn(user_input) if cast_fn else user_input
67	except ValueError:
68	print(f"Invalid value. Expecting {cast_fn.__name__}. Please try again.")
69
70
71	def find_index_range(folder, basename):
72	pattern = os.path.join(folder, f"{basename}*.nc")
73	files = glob.glob(pattern)
74	indices = []
75	for f in files:
76	name = os.path.basename(f)
77	m = re.match(fr"{re.escape(basename)}(\d+)\.nc$", name)
78	if m:
79	indices.append(int(m.group(1)))
80	if not indices:
81	raise FileNotFoundError(f"No files matching {basename}*.nc found in {folder}")
82	return min(indices), max(indices)
83
84
85	def main():
86	args = parse_args()
87
88	# Defaults
89	default_folder = args.folder or "diags"
90	default_basename = args.basename or "diagpem"
91
92	# Prompt for folder and basename with defaults
93	folder = prompt_with_default(
94	"Enter the folder path containing NetCDF files", default_folder
95	)
96	basename = prompt_with_default(
97	"Enter the base filename", default_basename
98	)
99
100	# Determine available index range
101	min_idx, max_idx = find_index_range(folder, basename)
102	print(f"Found files from index {min_idx} to {max_idx}.")
103
104	# Prompt for start/end with discovered defaults
105	start = args.start if args.start is not None else prompt_with_default(
106	"Enter the starting file index", min_idx, cast_fn=int
107	)
108	end = args.end if args.end is not None else prompt_with_default(
109	"Enter the ending file index", max_idx, cast_fn=int
110	)
111
112	# Validate range
113	if start < min_idx or end > max_idx or start > end:
114	print(f"Invalid range: must be between {min_idx} and {max_idx}, and start <= end.")
115	sys.exit(1)
116
117	# Output filename
118	output = args.output or prompt_with_default(
119	"Enter the output filename (including .nc)", "merged.nc"
120	)
121
122	# Build and verify file list
123	file_list = [
124	os.path.join(folder, f"{basename}{i}.nc")
125	for i in range(start, end + 1)
126	]
127	for fpath in file_list:
128	if not os.path.isfile(fpath):
129	raise FileNotFoundError(f"File not found: {fpath}")
130
131	# Offset Time values to make them cumulative
132	datasets = []
133	time_offset = 0
134
135	for fpath in file_list:
136	ds = xr.open_dataset(fpath, decode_times=False)
137
138	if 'Time' not in ds.coords:
139	raise ValueError(f"'Time' coordinate not found in {fpath}")
140
141	time_vals = ds['Time'].values
142
143	new_time_vals = time_vals + time_offset
144	ds = ds.assign_coords(Time=new_time_vals)
145	datasets.append(ds)
146
147	if len(time_vals) > 1:
148	dt = time_vals[1] - time_vals[0]
149	duration = dt * len(time_vals)
150	else:
151	duration = 1
152	time_offset += duration
153
154	# Concatenate
155	merged_ds = xr.concat(datasets, dim="Time")
156
157	# Optionally decode CF conventions after loading
158	try:
159	merged_ds = xr.decode_cf(merged_ds)
160	except Exception as e:
161	print(f"Warning: CF decoding failed: {e}\nProceeding with raw time values.")
162
163	# Inspect and save
164	try:
165	tmax = merged_ds.Time.max().values
166	print(f"Final time value: {tmax:.0f}")
167	except Exception:
168	print("Time variables not decoded correctly.")
169
170	merged_ds.to_netcdf(output)
171	print(f"Merged dataset written to {output}")
172
173
174	if __name__ == "__main__":
175	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/LMDZ.COMMON/libf/evolution/deftank/concat_pem.py

Download in other formats: