Ignore:
Timestamp:
Jul 28, 2025, 4:56:58 PM (5 days ago)
Author:
jbclement
Message:

PEM:
Bug correction to detect the job time limit with PBS/TORQUE. Making it more robust and automatic for the launching script and the Fortran code.
JBC

Location:
trunk/LMDZ.COMMON/libf/misc
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.COMMON/libf/misc/job_timelimit_mod.F90

    r3837 r3869  
    44! DESCRIPTION:
    55!    Retrieves the time limit (in seconds) for a given job ID via
    6 !    SLURM (squeue) or PBS/TORQUE (qstat).
     6!    SLURM or PBS/TORQUE.
    77!***********************************************************************
    88
     
    1717!=======================================================================
    1818
    19 SUBROUTINE get_job_timelimit(arg)
     19SUBROUTINE get_job_timelimit(jobid)
    2020
    2121implicit none
    2222
    2323!---- Arguments
    24 character(*), intent(in) :: arg
     24character(*), intent(in) :: jobid
    2525
    2626!---- Variables
     
    3333! Check that the job ID is numeric
    3434num_str = .true.
    35 do i = 1,len_trim(arg)
    36     if (arg(i:i) < '0' .or. arg(i:i) > '9') then
     35do i = 1,len_trim(jobid)
     36    if (jobid(i:i) < '0' .or. jobid(i:i) > '9') then
    3737        num_str = .false.
    3838        exit
     
    4242
    4343! Try SLURM (squeue)
    44 call execute_command_line('squeue -j '//trim(adjustl(arg))//' -h --Format TimeLimit > tmp_cmdout.txt',cmdstat = cstat)
     44call execute_command_line('squeue -j '//trim(adjustl(jobid))//' -h --Format TimeLimit > tmp_timelimit.txt',cmdstat = cstat)
    4545if (cstat /= 0) then
    4646    ! On failure, try PBS/TORQUE (qstat)
    47     call execute_command_line('qstat -f '//trim(adjustl(arg))//' | grep "Walltime" | awk ''{print $3}'' > tmp_cmdout.txt',cmdstat = cstat)
     47    call execute_command_line('qstat -f '//trim(adjustl(jobid))//' | grep "Walltime" | awk ''{print $3}'' > tmp_timelimit.txt',cmdstat = cstat)
    4848    if (cstat > 0) then
    4949        error stop 'Error: command execution failed!'
     
    5454
    5555! Read the output
    56 open(1,file = 'tmp_cmdout.txt',status = 'old')
     56open(1,file = 'tmp_timelimit.txt',status = 'old')
    5757read(1,'(a)') chtimelimit
    5858close(1)
     
    6060
    6161! Remove temporary file
    62 call execute_command_line('rm tmp_cmdout.txt',cmdstat = cstat)
     62call execute_command_line('rm tmp_timelimit.txt',cmdstat = cstat)
    6363if (cstat > 0) then
    6464    error stop 'Error: command execution failed!'
  • trunk/LMDZ.COMMON/libf/misc/parse_args_mod.F90

    r3837 r3869  
    1212
    1313use pgrm_version_mod,  only: print_pgrm_version
     14use job_id_mod,        only: get_job_id
    1415use job_timelimit_mod, only: get_job_timelimit
    1516
     
    3031!---- Variables
    3132integer        :: narg, i, eq_pos
    32 character(256) :: arg, key, vlu
     33character(256) :: arg, key, vlu, jobid
    3334
    3435!---- Code
     
    7374            write(*,*) 'SSO fields will be included in "start_archive.nc"'
    7475
    75         case ('--jobid')
    76             call get_job_timelimit(vlu)
     76        case ('--auto-exit')
     77            call get_job_id(jobid)
     78            call get_job_timelimit(jobid)
    7779
    7880        case default
     
    9395    write(*,*) '  --version [file]    Print program version and exit (optional output file)'
    9496    write(*,*) '  --add-sso           Add SSO fields to "start_archive.nc" (only available for Mars start2archive)'
    95     write(*,*) '  --jobid <id>        Query the time limit for the specified job ID,'
    96     write(*,*) '                      enabling self-termination before timeout (only available for the PEM)'
     97    write(*,*) '  --auto-exit         Enable automatic termination before reaching the job time limit (only available for the PEM)'
    9798    write(*,*)
    9899END SUBROUTINE print_usage
Note: See TracChangeset for help on using the changeset viewer.