| 1 | #!/usr/bin/env python3 |
|---|
| 2 | ################################################################ |
|---|
| 3 | ### Python script to concatenate the NetCDF files of the PEM ### |
|---|
| 4 | ### along the dimension 'Time' into one NetCDF file ### |
|---|
| 5 | ################################################################ |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | import os |
|---|
| 9 | import re |
|---|
| 10 | import sys |
|---|
| 11 | import glob |
|---|
| 12 | import readline |
|---|
| 13 | import argparse |
|---|
| 14 | import xarray as xr |
|---|
| 15 | import numpy as np |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | def complete_path(text, state): |
|---|
| 19 | matches = glob.glob(text + '*') |
|---|
| 20 | return matches[state] if state < len(matches) else None |
|---|
| 21 | |
|---|
| 22 | readline.set_completer_delims(' \t\n;') |
|---|
| 23 | readline.set_completer(complete_path) |
|---|
| 24 | readline.parse_and_bind("tab: complete") |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | def parse_args(): |
|---|
| 28 | parser = argparse.ArgumentParser( |
|---|
| 29 | description="Concatenate multiple NetCDF files along the Time dimension" |
|---|
| 30 | ) |
|---|
| 31 | parser.add_argument( |
|---|
| 32 | "--folder", type=str, |
|---|
| 33 | help="Path to the directory containing the NetCDF files" |
|---|
| 34 | ) |
|---|
| 35 | parser.add_argument( |
|---|
| 36 | "--basename", type=str, |
|---|
| 37 | help="Base name of the files, e.g., 'diagpem' for files like diagpem1.nc" |
|---|
| 38 | ) |
|---|
| 39 | parser.add_argument( |
|---|
| 40 | "--start", type=int, |
|---|
| 41 | help="Starting index of the files to include" |
|---|
| 42 | ) |
|---|
| 43 | parser.add_argument( |
|---|
| 44 | "--end", type=int, |
|---|
| 45 | help="Ending index of the files to include (inclusive)" |
|---|
| 46 | ) |
|---|
| 47 | parser.add_argument( |
|---|
| 48 | "--output", type=str, default="merged.nc", |
|---|
| 49 | help="Output filename for the concatenated NetCDF" |
|---|
| 50 | ) |
|---|
| 51 | return parser.parse_args() |
|---|
| 52 | |
|---|
| 53 | |
|---|
| 54 | def prompt_with_default(prompt_text, default, cast_fn=None): |
|---|
| 55 | prompt = f"{prompt_text} [press Enter for default {default}]: " |
|---|
| 56 | while True: |
|---|
| 57 | try: |
|---|
| 58 | user_input = input(prompt) |
|---|
| 59 | except KeyboardInterrupt: |
|---|
| 60 | print("\nInterrupted.") |
|---|
| 61 | sys.exit(1) |
|---|
| 62 | |
|---|
| 63 | if not user_input.strip(): |
|---|
| 64 | return default |
|---|
| 65 | try: |
|---|
| 66 | return cast_fn(user_input) if cast_fn else user_input |
|---|
| 67 | except ValueError: |
|---|
| 68 | print(f"Invalid value. Expecting {cast_fn.__name__}. Please try again.") |
|---|
| 69 | |
|---|
| 70 | |
|---|
| 71 | def find_index_range(folder, basename): |
|---|
| 72 | pattern = os.path.join(folder, f"{basename}*.nc") |
|---|
| 73 | files = glob.glob(pattern) |
|---|
| 74 | indices = [] |
|---|
| 75 | for f in files: |
|---|
| 76 | name = os.path.basename(f) |
|---|
| 77 | m = re.match(fr"{re.escape(basename)}(\d+)\.nc$", name) |
|---|
| 78 | if m: |
|---|
| 79 | indices.append(int(m.group(1))) |
|---|
| 80 | if not indices: |
|---|
| 81 | raise FileNotFoundError(f"No files matching {basename}*.nc found in {folder}") |
|---|
| 82 | return min(indices), max(indices) |
|---|
| 83 | |
|---|
| 84 | |
|---|
| 85 | def main(): |
|---|
| 86 | args = parse_args() |
|---|
| 87 | |
|---|
| 88 | # Defaults |
|---|
| 89 | default_folder = args.folder or "diags" |
|---|
| 90 | default_basename = args.basename or "diagpem" |
|---|
| 91 | |
|---|
| 92 | # Prompt for folder and basename with defaults |
|---|
| 93 | folder = prompt_with_default( |
|---|
| 94 | "Enter the folder path containing NetCDF files", default_folder |
|---|
| 95 | ) |
|---|
| 96 | basename = prompt_with_default( |
|---|
| 97 | "Enter the base filename", default_basename |
|---|
| 98 | ) |
|---|
| 99 | |
|---|
| 100 | # Determine available index range |
|---|
| 101 | min_idx, max_idx = find_index_range(folder, basename) |
|---|
| 102 | print(f"Found files from index {min_idx} to {max_idx}.") |
|---|
| 103 | |
|---|
| 104 | # Prompt for start/end with discovered defaults |
|---|
| 105 | start = args.start if args.start is not None else prompt_with_default( |
|---|
| 106 | "Enter the starting file index", min_idx, cast_fn=int |
|---|
| 107 | ) |
|---|
| 108 | end = args.end if args.end is not None else prompt_with_default( |
|---|
| 109 | "Enter the ending file index", max_idx, cast_fn=int |
|---|
| 110 | ) |
|---|
| 111 | |
|---|
| 112 | # Validate range |
|---|
| 113 | if start < min_idx or end > max_idx or start > end: |
|---|
| 114 | print(f"Invalid range: must be between {min_idx} and {max_idx}, and start <= end.") |
|---|
| 115 | sys.exit(1) |
|---|
| 116 | |
|---|
| 117 | # Output filename |
|---|
| 118 | output = args.output or prompt_with_default( |
|---|
| 119 | "Enter the output filename (including .nc)", "merged.nc" |
|---|
| 120 | ) |
|---|
| 121 | |
|---|
| 122 | # Build and verify file list |
|---|
| 123 | file_list = [ |
|---|
| 124 | os.path.join(folder, f"{basename}{i}.nc") |
|---|
| 125 | for i in range(start, end + 1) |
|---|
| 126 | ] |
|---|
| 127 | for fpath in file_list: |
|---|
| 128 | if not os.path.isfile(fpath): |
|---|
| 129 | raise FileNotFoundError(f"File not found: {fpath}") |
|---|
| 130 | |
|---|
| 131 | # Offset Time values to make them cumulative |
|---|
| 132 | datasets = [] |
|---|
| 133 | time_offset = 0 |
|---|
| 134 | |
|---|
| 135 | for fpath in file_list: |
|---|
| 136 | ds = xr.open_dataset(fpath, decode_times=False) |
|---|
| 137 | |
|---|
| 138 | if 'Time' not in ds.coords: |
|---|
| 139 | raise ValueError(f"'Time' coordinate not found in {fpath}") |
|---|
| 140 | |
|---|
| 141 | time_vals = ds['Time'].values |
|---|
| 142 | |
|---|
| 143 | new_time_vals = time_vals + time_offset |
|---|
| 144 | ds = ds.assign_coords(Time=new_time_vals) |
|---|
| 145 | datasets.append(ds) |
|---|
| 146 | |
|---|
| 147 | if len(time_vals) > 1: |
|---|
| 148 | dt = time_vals[1] - time_vals[0] |
|---|
| 149 | duration = dt * len(time_vals) |
|---|
| 150 | else: |
|---|
| 151 | duration = 1 |
|---|
| 152 | time_offset += duration |
|---|
| 153 | |
|---|
| 154 | # Concatenate |
|---|
| 155 | merged_ds = xr.concat(datasets, dim="Time") |
|---|
| 156 | |
|---|
| 157 | # Optionally decode CF conventions after loading |
|---|
| 158 | try: |
|---|
| 159 | merged_ds = xr.decode_cf(merged_ds) |
|---|
| 160 | except Exception as e: |
|---|
| 161 | print(f"Warning: CF decoding failed: {e}\nProceeding with raw time values.") |
|---|
| 162 | |
|---|
| 163 | # Inspect and save |
|---|
| 164 | try: |
|---|
| 165 | tmax = merged_ds.Time.max().values |
|---|
| 166 | print(f"Final time value: {tmax:.0f}") |
|---|
| 167 | except Exception: |
|---|
| 168 | print("Time variables not decoded correctly.") |
|---|
| 169 | |
|---|
| 170 | merged_ds.to_netcdf(output) |
|---|
| 171 | print(f"Merged dataset written to {output}") |
|---|
| 172 | |
|---|
| 173 | |
|---|
| 174 | if __name__ == "__main__": |
|---|
| 175 | main() |
|---|