# Python tools to transform from U. Wyoming sounding file to netCDF 
#    http://weather.uwyo.edu/upperair/sounding.html
# L. Fita,  CIMA August 2017
##
## e.g. # UWyoming_snd_nc.py -f snd_CORDOBA.txt

### ASCII files from U. Wyoming can have more than one sounding per file, but from
###   the same station.
### Script spect to find
# [num] [Text] Observations at [time]Z [Date]
#
#-----------------------------------------------------------------------------
#   PRES   HGHT   TEMP   DWPT   RELH   MIXR   DRCT   SKNT   THTA   THTE   THTV
#    hPa     m      C      C      %    g/kg    deg   knot     K      K      K 
#-----------------------------------------------------------------------------
# lines of data
# Station information and sounding indices

#                         Station identifier: [Value]
#                             Station number: [Value]
#                           Observation time: [date]/[time]
# (...)
#
# [num] [Text] Observations at [time]Z [Date]
# (...)
 
from optparse import OptionParser
import numpy as np
from netCDF4 import Dataset as NetCDFFile
import os
import re
import numpy.ma as ma
# Importing generic tools file 'nc_var_tools.py'
import nc_var_tools as ncvar
# Importing generic tools file 'generic_tools.py'
import generic_tools as gen
import subprocess as sub
import time

main = 'UWyoming_snd_nc.py'

###### ###### ##### #### ### ## #

# Time reference
TrefS = '1949-12-01 00:00:00'
TrefYmdHMS = '19491201000000'
tunits = 'minutes'
tfmt = '%y%m%d/%H%M'

# Integer text values (all the rest as float)
intTXTvals = ['Station number']

# Text values (all the rest as float)
txtTXTvals = ['Station identifier', 'Observation time']

# Float values for global attributes:
floatTXTvals = ['Station elevation', 'Station longitude', 'Station latitude']

# Not lower pressure variables
NOTlowpres = ['DWPT', 'RELH', 'MIXR', 'THTE']

Lstring = 256

# Arguments
##
parser = OptionParser()
parser.add_option("-D", "--Debug", dest="debug", 
  help="debug prints",  metavar="BOOL")
parser.add_option("-f", "--snd_file", dest="sndfile", 
  help="U. Wyoming sounding (http://weather.uwyo.edu/upperair/sounding.html) file to use", 
  metavar="FILE")

(opts, args) = parser.parse_args()

#######    #######
## MAIN
    #######

# Global attributes for the station
stngattrk = intTXTvals + txtTXTvals + floatTXTvals

if opts.debug is None:
    print gen.infmsg
    print '  ' + main + ": no debug value provided!!"
    print '    assuming:', False
    debug = False
else:
    debug = gen.Str_Bool(opts.debug)

if not os.path.isfile(opts.sndfile):
      print gen.errormsg
      print '   ' + main + ": sounding file '" + opts.sndfile + "' does not exist !!"
      quit(-1)    

# Reading sounding file
osnd = open(opts.sndfile, 'r')

# Processed sounding
soundings = {}

# Processed dates
idate = 0

# List with the dates of  the soundings, to keep them consecutive!!
Tsoundings = []

for line in osnd:
    if line[0:1] != '#' and line[0:1] != '<' and line[0:1] != '-' and len(line) > 1:
        lvals = gen.values_line(line, ' ', ['\t'])
        newsnd = lvals.count('Observations')
        # Processing a new date
        if newsnd != 0:
            if idate == 0:
                print '  reading first sounding!'
                pvals = {}
                txtvals = {}
                idate = idate + 1
                strefn = lvals[1]
                idobsS = lvals.index('Observations')
                stnameS = ' '.join(lvals[2:idobsS])
            else:
                stationdateS = str(txtvals['Station number']) + '_' + txtvals['Observation time']
                print "   data for sounding: '" + stationdateS + "' _______"

                if (debug):
                    for ip in pvals.keys():
                        print ip, ':', pvals[ip]
                    for ic in txtvals.keys():
                        print ic, ':', txtvals[ic]

                Tsoundings.append(stationdateS)
                soundings[stationdateS] = [pvals, txtvals]
                if len(soundings.keys()) == 1:
                    presvals = list(pvals.keys())
                else:
                    noinc = list(set(pvals.keys()).difference(set(presvals)))
                    presvals = presvals + noinc
                    presvals.sort(reverse=True)
                pvals = {}
                txtvals = {}
                idate = idate + 1
        elif lvals[0] == 'hPa':
            if (debug):
                print '      getting text values ...'
            if lvals[0] == 'hPa':
                if idate == 1:
                    if (debug):
                        print '      getting units of the variables'
                    varu = {}
                    iv = 0
                    for vn in sndvarn:
                        varu[vn] = lvals[iv]
                        iv = iv + 1
        elif lvals[0] == '-':
            print ' '
#        elif lvals[0].index() == 
        elif lvals[0] == 'PRES':
            # doing nothing!
            sndvarn = list(lvals)
        else:
            # Numeric values
            if gen.IsNumber(lvals[0], 'R') and len(lvals) > 1 and lvals[1] != 'hPa':
                Sline = line.replace('\n', '').replace('\t', '')
                linvals = gen.getting_fixedline(Sline,[7,14,21,28,35,42,49,56,63,70],\
                  ['R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
                if (debug):
                    print '      getting values at pressure:', np.float(lvals[0])
                pvals[np.float(lvals[0])] = np.array(linvals[1:], dtype=np.float)
            else:
                if (debug):
                    print '      getting text values ...'
                if lvals[0] == 'hPa':
                    if idate == 1:
                        if (debug):
                            print '      getting units of the variables'
                        varu = {}
                        iv = 0
                        for vn in sndvarn:
                            varu[vn] = lvals[iv]
                            iv = iv + 1
                else:
                    txt = ''
                    # Specific work for 'Observation time:'
                    if gen.searchInlist(lvals, 'Observation'):
                        txtvals['Observation time'] = lvals[2]
                    else:
                        for iv in range(len(lvals)):
                            if not gen.IsNumber(lvals[iv], 'R'):
                                if len(txt) == 0:
                                    txt = lvals[iv]
                                else:
                                    txt = txt + ' ' + lvals[iv]
                            else:
                                # Specific work for '1000 hPa to 500 hPa thickness:'
                                if gen.searchInlist(lvals, 'thickness:') and iv <= 5:
                                    if len(txt) == 0:
                                        txt = lvals[iv]
                                    else:
                                        txt = txt + ' ' + lvals[iv]
                                else:
                                    txtn = txt.replace(':', '')
                                    if gen.searchInlist(intTXTvals, txtn):
                                        txtvals[txtn] = int(lvals[iv])
                                    elif gen.searchInlist(txtTXTvals, txtn):
                                        txtvals[txtn] = lvals[iv]
                                    else:
                                        txtvals[txtn] = np.float(lvals[iv])
                        if (debug):
                            print "        text value: '" + txtn + "':", txtvals[txtn]

# Including last sounding
stationdateS = str(txtvals['Station number']) + '_' + txtvals['Observation time']
print "   data for sounding: '" + stationdateS + "' _______"

if (debug):
    for ip in pvals.keys():
        print ip, ':', pvals[ip]
    for ic in txtvals.keys():
        print ic, ':', txtvals[ic]

Tsoundings.append(stationdateS)
soundings[stationdateS] = [pvals, txtvals]
if len(soundings.keys()) == 1:
    presvals = list(pvals.keys())
else:
    noinc = list(set(pvals.keys()).difference(set(presvals)))
    presvals = presvals + noinc
    presvals.sort(reverse=True)

osnd.close()
varns = list(sndvarn)
varns.remove('PRES')

# Getting measured values
Ntimes = len(Tsoundings)
Npres = len(presvals)
Nvals = len(varns)

sndvals = np.ones((Ntimes, Npres, Nvals), dtype=np.float)*gen.fillValueF
for it in range(Ntimes):
    snditS = Tsoundings[it]
    sndvs = soundings[snditS]
    sndv = sndvs[0]
    for ip in range(Npres):
        if sndv.has_key(presvals[ip]):
            sndpv = sndv[presvals[ip]]

            if len(sndpv) == Nvals:
                sndvals[it,ip,:] = sndpv
            else:
                ivv = 0
                if len(sndpv) == Nvals - len(NOTlowpres):
                    for iv in range(Nvals):
                        if not gen.searchInlist(NOTlowpres, varns[iv]):
                            sndvals[it,ip,iv] = sndpv[ivv]
                            ivv = ivv + 1
                elif len(sndpv) == Nvals - len(NOTlowpres) - 2:
                    for iv in range(Nvals):
                        if not gen.searchInlist(NOTlowpres, varns[iv]) and         \
                          not gen.searchInlist(['DRCT', 'SKNT'], varns[iv]):
                            sndvals[it,ip,iv] = sndpv[ivv]
                            ivv = ivv + 1
                elif len(sndpv) == 1:
                    sndvals[it,ip,ivv] = sndpv[0]

sndvals = ma.masked_equal(sndvals, gen.fillValueF)
print '  recupered values shape:', sndvals.shape
if (debug):
    print '    values from file _______'
    print sndvals

# Removing not computed values
statglobalattr = {}
txtn = list(txtvals.keys())
snditS = Tsoundings[0]
sndvs = soundings[snditS]
sndc = sndvs[1]

for Sn in intTXTvals:
    SnS = Sn.replace(' ','_')
    if gen.searchInlist(txtn, Sn):
        if sndc.has_key(Sn): statglobalattr[SnS] = int(sndc[Sn])
        else: statglobalattr[SnS] = '-'
        txtn.remove(Sn)
    else:
        if sndc.has_key(Sn): statglobalattr[SnS] = int(sndc[Sn])
        else: statglobalattr[SnS] = '-'

for Sn in txtTXTvals:
    SnS = Sn.replace(' ','_')
    if gen.searchInlist(txtn, Sn):
        if sndc.has_key(Sn): statglobalattr[SnS] = sndc[Sn]
        else: statglobalattr[SnS] = '-'
        txtn.remove(Sn)
    else:
        if sndc.has_key(Sn): statglobalattr[SnS] = int(sndc[Sn])
        else: statglobalattr[SnS] = '-'

for Sn in floatTXTvals:
    SnS = Sn.replace(' ','_')
    if gen.searchInlist(txtn, Sn): 
        if sndc.has_key(Sn): statglobalattr[SnS] = np.float(sndc[Sn])
        else: statglobalattr[Sn] = '-'
        txtn.remove(Sn)
    else:
        if sndc.has_key(Sn): statglobalattr[SnS] = int(sndc[Sn])
        else: statglobalattr[SnS] = '-'

Ncomp = len(txtn)
compvals = np.ones((Ntimes, Ncomp), dtype=np.float)*gen.fillValueF

tvals = []
for it in range(Ntimes):
    snditS = Tsoundings[it]
    sndvs = soundings[snditS]
    sndc = sndvs[1]
    tvals.append(sndc['Observation time'])
    for ic in range(len(txtn)):
        if sndc.has_key(txtn[ic]):
            compvals[it,ic] = sndc[txtn[ic]]

# netCDF file creation
##
ofilen = 'UWyoming_snd_' + str(txtvals['Station number']) + '.nc'

onewnc = NetCDFFile(ofilen, 'w')

# Creation of dimensions
onewnc.createDimension('pres', Npres)
onewnc.createDimension('time', None)
#onewnc.createDimension('cvals', Ncomp)
#onewnc.createDimension('Lstring', Lstring)

# Creation of variable-dimensions 
newvar = onewnc.createVariable('pres', 'f8', ('pres'))
newvar[:] = presvals
ncvar.basicvardef(newvar, 'pres', 'pressure', varu['PRES'])

# No more computedvalues matrix !
#newvar = onewnc.createVariable('cvals', 'c', ('cvals', 'Lstring'))
#ncvar.writing_str_nc(newvar, txtn, Lstring)
#ncvar.basicvardef(newvar, 'cvals', 'computed values from sounding data', 'hPa')

newvar = onewnc.createVariable('time', 'f8', ('time'))
timeT = []
atimeT = np.zeros((len(tvals),6), dtype=int)
iit = 0
for it in tvals:
    dateT = time.strptime(it, tfmt)
    timeT.append(dateT)
    atimeT[iit,:] = np.array([dateT.tm_year, dateT.tm_mon, dateT.tm_mday,            \
      dateT.tm_hour, dateT.tm_min, dateT.tm_sec])
    iit = iit + 1

CFtimes = gen.realdatetime_CFcompilant(atimeT, TrefYmdHMS, tunits)
newvar[:] = CFtimes
ncvar.basicvardef(newvar, 'time', 'time', tunits + ' since ' + TrefS)
ncvar.set_attribute(newvar, 'calendar', 'gregorian')

# Filling with sounding values
iv = 0
for varn in varns:
    CFvals = gen.variables_values(varn)
    newvar = onewnc.createVariable(CFvals[0], 'f', ('time', 'pres'),                 \
      fill_value=gen.fillValueF)
    newvar[:] = sndvals[:,:,iv]
    ncvar.basicvardef(newvar, varn, CFvals[4].replace('|', ' '), varu[varn])
    iv = iv + 1
onewnc.sync()

# No more computedvalues matrix !
#newvar = onewnc.createVariable('computedvals', 'f', ('time', 'cvals'),               \
#  fill_value=gen.fillValueF)
#newvar[:] = compvals
#ncvar.basicvardef(newvar, 'computedvals', 'values computed from sounding data', '-')
#onewnc.sync()

# Getting specific 1D values
for ic in range(len(txtn)):
    if len(txtn[ic]) < 2: continue
    Sn = txtn[ic]
    CFvalues = gen.variables_values(Sn)
    newvar=onewnc.createVariable(CFvalues[0],'f', ('time'), fill_value=gen.fillValueF)
    newvar[:] = compvals[:,ic]
    ncvar.basicvardef(newvar, CFvalues[1], CFvalues[4].replace('|',' '), CFvalues[5])
    onewnc.sync()

# Global attributes
ncvar.add_global_PyNCplot(onewnc, main, '', '0.2')
onewnc.setncattr('Station_ref', strefn)
onewnc.setncattr('Station_name', stnameS)

for atn in stngattrk:
    atnS = atn.replace(' ','_')
    onewnc.setncattr(atnS, statglobalattr[atnS])

onewnc.close()
print main + ": succesful writing of sounding file '" + ofilen + "' !!"

