source: trunk/LMDZ.COMMON/libf/evolution/deftank/concat_pem.py

Last change on this file was 3883, checked in by jbclement, 4 months ago

PEM:

  • Replacing the Bash script "concat_diagpem.sh" by a Python script "concat_pem.py" which is much faster.
  • Corrections and updates for "visu_evol_layering.py".

JBC

  • Property svn:executable set to *
File size: 5.1 KB
Line 
1#!/usr/bin/env python3
2################################################################
3### Python script to concatenate the NetCDF files of the PEM ###
4### along the dimension 'Time' into one NetCDF file          ###
5################################################################
6
7
8import os
9import re
10import sys
11import glob
12import readline
13import argparse
14import xarray as xr
15import numpy as np
16
17
18def complete_path(text, state):
19    matches = glob.glob(text + '*')
20    return matches[state] if state < len(matches) else None
21
22readline.set_completer_delims(' \t\n;')
23readline.set_completer(complete_path)
24readline.parse_and_bind("tab: complete")
25
26
27def parse_args():
28    parser = argparse.ArgumentParser(
29        description="Concatenate multiple NetCDF files along the Time dimension"
30    )
31    parser.add_argument(
32        "--folder", type=str,
33        help="Path to the directory containing the NetCDF files"
34    )
35    parser.add_argument(
36        "--basename", type=str,
37        help="Base name of the files, e.g., 'diagpem' for files like diagpem1.nc"
38    )
39    parser.add_argument(
40        "--start", type=int,
41        help="Starting index of the files to include"
42    )
43    parser.add_argument(
44        "--end", type=int,
45        help="Ending index of the files to include (inclusive)"
46    )
47    parser.add_argument(
48        "--output", type=str, default="merged.nc",
49        help="Output filename for the concatenated NetCDF"
50    )
51    return parser.parse_args()
52
53
54def prompt_with_default(prompt_text, default, cast_fn=None):
55    prompt = f"{prompt_text} [press Enter for default {default}]: "
56    while True:
57        try:
58            user_input = input(prompt)
59        except KeyboardInterrupt:
60            print("\nInterrupted.")
61            sys.exit(1)
62
63        if not user_input.strip():
64            return default
65        try:
66            return cast_fn(user_input) if cast_fn else user_input
67        except ValueError:
68            print(f"Invalid value. Expecting {cast_fn.__name__}. Please try again.")
69
70
71def find_index_range(folder, basename):
72    pattern = os.path.join(folder, f"{basename}*.nc")
73    files = glob.glob(pattern)
74    indices = []
75    for f in files:
76        name = os.path.basename(f)
77        m = re.match(fr"{re.escape(basename)}(\d+)\.nc$", name)
78        if m:
79            indices.append(int(m.group(1)))
80    if not indices:
81        raise FileNotFoundError(f"No files matching {basename}*.nc found in {folder}")
82    return min(indices), max(indices)
83
84
85def main():
86    args = parse_args()
87
88    # Defaults
89    default_folder = args.folder or "diags"
90    default_basename = args.basename or "diagpem"
91
92    # Prompt for folder and basename with defaults
93    folder = prompt_with_default(
94        "Enter the folder path containing NetCDF files", default_folder
95    )
96    basename = prompt_with_default(
97        "Enter the base filename", default_basename
98    )
99
100    # Determine available index range
101    min_idx, max_idx = find_index_range(folder, basename)
102    print(f"Found files from index {min_idx} to {max_idx}.")
103
104    # Prompt for start/end with discovered defaults
105    start = args.start if args.start is not None else prompt_with_default(
106        "Enter the starting file index", min_idx, cast_fn=int
107    )
108    end = args.end if args.end is not None else prompt_with_default(
109        "Enter the ending file index", max_idx, cast_fn=int
110    )
111
112    # Validate range
113    if start < min_idx or end > max_idx or start > end:
114        print(f"Invalid range: must be between {min_idx} and {max_idx}, and start <= end.")
115        sys.exit(1)
116
117    # Output filename
118    output = args.output or prompt_with_default(
119        "Enter the output filename (including .nc)", "merged.nc"
120    )
121
122    # Build and verify file list
123    file_list = [
124        os.path.join(folder, f"{basename}{i}.nc")
125        for i in range(start, end + 1)
126    ]
127    for fpath in file_list:
128        if not os.path.isfile(fpath):
129            raise FileNotFoundError(f"File not found: {fpath}")
130
131    # Offset Time values to make them cumulative
132    datasets = []
133    time_offset = 0
134
135    for fpath in file_list:
136        ds = xr.open_dataset(fpath, decode_times=False)
137
138        if 'Time' not in ds.coords:
139            raise ValueError(f"'Time' coordinate not found in {fpath}")
140
141        time_vals = ds['Time'].values
142
143        new_time_vals = time_vals + time_offset
144        ds = ds.assign_coords(Time=new_time_vals)
145        datasets.append(ds)
146
147        if len(time_vals) > 1:
148            dt = time_vals[1] - time_vals[0]
149            duration = dt * len(time_vals)
150        else:
151            duration = 1
152        time_offset += duration
153
154    # Concatenate
155    merged_ds = xr.concat(datasets, dim="Time")
156
157    # Optionally decode CF conventions after loading
158    try:
159        merged_ds = xr.decode_cf(merged_ds)
160    except Exception as e:
161        print(f"Warning: CF decoding failed: {e}\nProceeding with raw time values.")
162
163    # Inspect and save
164    try:
165        tmax = merged_ds.Time.max().values
166        print(f"Final time value: {tmax:.0f}")
167    except Exception:
168        print("Time variables not decoded correctly.")
169
170    merged_ds.to_netcdf(output)
171    print(f"Merged dataset written to {output}")
172
173
174if __name__ == "__main__":
175    main()
Note: See TracBrowser for help on using the repository browser.