# Python script to transfomr ASCII Time-series WRF outputs to netCDF
# From L. Fita work in different places: LMD (France)
# More information at: http://www.xn--llusfb-5va.cat/python/PyNCplot
#
# pyNCplot and its component TS_ASCII_netCDF.py comes with ABSOLUTELY NO WARRANTY. 
# This work is licendes under a Creative Commons 
#   Attribution-ShareAlike 4.0 International License (http://creativecommons.org/licenses/by-sa/4.0)
#

## g.e. # TS_ASCII_netCDF.py -f //media/ExtDiskD/bkup/ciclad/etudes/WL_HyMeX/iop15/wrf/run/control/stations_20121018000000-20121022000000/h0001.d01.TS -s 20121018000000

import os
from optparse import OptionParser
import numpy as np
from netCDF4 import Dataset as NetCDFFile
import re

main = 'TS_ASCII_netCDF.py'
errormsg='ERROR -- error -- ERROR -- error'
warnmsg='WARNING -- warning -- WARNING -- warning'

fillValue = 1.e20

def searchInlist(listname, nameFind):
    """ Function to search a value within a list
    listname = list
    nameFind = value to find
    >>> searInlist(['1', '2', '3', '5'], '5')
    True
    """
    for x in listname:
      if x == nameFind:
        return True
    return False

def reduce_spaces(string):
    """ Function to give words of a line of text removing any extra space
    """
    values = string.replace('\n','').split(' ')
    vals = []
    for val in values:
         if len(val) > 0:
             vals.append(val)

    return vals

def reduce_last_spaces(string):
    """ Function to reduce the last right spaces from a string
      string= string to remove the spaces at the righ side of the string
    >>> reduce_last_spaces('Hola Lluis      ')
    'Hola Lluis'
    """
    fname = 'reduce_last_spaces'

    Lstring = len(string)

    firstspace = -1
    for istr in range(Lstring-1,0,-1):
        if string[istr:istr+1] == ' ':
            firstspace = istr
        else:
            break

    if firstspace == -1:
        print errormsg
        print '  ' + fname + ": string '" + string + "' is not ended by spaces!!"
        print '    char. number of last character:',ord(string[Lstring:Lstring+1])
        quit(-1)

    newstr = string[0:firstspace]

    return newstr

def set_attribute(ncv, attrname, attrvalue):
    """ Sets a value of an attribute of a netCDF variable. Removes previous attribute value if exists
    ncv = object netcdf variable
    attrname = name of the attribute
    attrvalue = value of the attribute
    """

    attvar = ncv.ncattrs()
    if searchInlist(attvar, attrname):
        attr = ncv.delncattr(attrname)

    attr = ncv.setncattr(attrname, attrvalue)

    return attr

def rmNOnum(string):
    """ Removing from a string all that characters which are not numbers
    # From: http://stackoverflow.com/questions/4289331/python-extract-numbers-from-a-string
    """
    fname = 'rmNOnum'

    newstr = str(re.findall("[-+]?\d+[\.]?\d*", string)[0])

    return newstr

def basicvardef(varobj, vstname, vlname, vunits):
    """ Function to give the basic attributes to a variable
    varobj= netCDF variable object
    vstname= standard name of the variable
    vlname= long name of the variable
    vunits= units of the variable
    """
    attr = varobj.setncattr('standard_name', vstname)
    attr = varobj.setncattr('long_name', vlname)
    attr = varobj.setncattr('units', vunits)

    return

def values_fortran_fmt(lin,fFMT):
    """ Function to give back the values of an ASCII line according to its fortran printing format
      lin= ASCII line
      fFMT= list with the fortran FORMAT formats
    forline = 'Natchitoches (RGNL)        1 11 0011  ( 31.733, -93.100) (  28, 157) ( 31.761, -93.113)   41.2 meters'
    formats = ['A26', 'I2', 'I3', 'A6', 'A2', 'F7.3', 'A1', 'F8.3', 'A3', 'I4', 'A1', 'I4',
      'A3', 'F7.3', 'A1', 'F8.3', 'A2', 'F6.1', 'A7']
    >>> values_fortran_fmt(forline, formats)
    ['Natchitoches (RGNL)        ', 1, 11, ' 0011  ', ' ( ', 31.733, ', ', -93.1, ') ( ', 28, ', ', 157, ')
      ( ', 31.761, ', ', -93.113, ')  ', 41.2, ' meters']
    """
    fname = 'values_fortran_fmt'

    afmts = ['A', 'D', 'F', 'I']

    if lin == 'h':
        print fname + '_____________________________________________________________'
        print values_fortran_fmt.__doc__
        quit()

    fvalues = []
    ichar=0
    ival = 0
    for ft in fFMT:
        Lft = len(ft)
        if ft[0:1] == 'A' or ft[0:1] == 'a':
            Lfmt = int(ft[1:Lft+1])
            fvalues.append(lin[ichar:ichar+Lfmt+1])
            ichar = ichar + Lfmt
        elif ft[0:1] == 'F' or ft[0:1] == 'f':
            if ft.find('.') != -1:
                Lft = len(ft.split('.')[0])
            Lfmt = int(ft[1:Lft])
            fvalues.append(np.float(rmNOnum(lin[ichar:ichar+Lfmt+1])))
            ichar = ichar + Lfmt
        elif ft[0:1] == 'D' or ft[0:1] == 'd':
            if ft.find('.') != -1:
                Lft = len(ft.split('.')[0])
            Lfmt = int(ft[1:Lft])
            fvalues.append(np.float64(rmNOnum(lin[ichar:ichar+Lfmt+1])))
            ichar = ichar + Lfmt
        elif ft[0:1] == 'I' or ft[0:1] == 'i':
            Lfmt = int(ft[1:Lft+1])
            fvalues.append(int(rmNOnum(lin[ichar:ichar+Lfmt+1])))
            ichar = ichar + Lfmt
        elif ft.find('X') != -1 or ft.find('x') != -1:
            print '    ' + fname + ': skipping space'
            ichar = ichar + int(rmNOnum(ft))
        else:
            print errormsg
            print '  ' + fname + ": format '" + ft[0:1] + "' not ready!!"
            print '    Available formats:',afmts
            quit(-1)

        ival = ival + 1

    return fvalues


def ts_header(ln):
    """ Function to get the values of the header of the *.TS files
      line=ASCII lines with the header of the TS file
      getting the line format from WRFV3.3 'EMCORE' in file 'share/wrf_timeseries.F'
    """
    fname = 'ts_header'

    fmts=['A26', 'I2', 'I3', 'A6', 'A2', 'F7.3', 'A1', 'F8.3', 'A3', 'I4', 'A1', 'I4',\
      'A3', 'F7.3', 'A1', 'F8.3', 'A2', 'F6.1', 'A7']

    headervalues = values_fortran_fmt(ln,fmts)

    return headervalues

def variables_values(varName):
    """ Function to provide values to plot the different variables values from ASCII file
      'variables_values.dat'
    variables_values(varName)
      [varName]= name of the variable
        return: [var name], [std name], [minimum], [maximum], 
          [long name]('|' for spaces), [units], [color palette] (following: 
          http://matplotlib.org/1.3.1/examples/color/colormaps_reference.html)
     [varn]: original name of the variable
       NOTE: It might be better doing it with an external ASII file. But then we 
         got an extra dependency...
    >>> variables_values('WRFght')
    ['z', 'geopotential_height', 0.0, 80000.0, 'geopotential|height', 'm2s-2', 'rainbow']
    """
    import subprocess as sub

    fname='variables_values'

    if varName == 'h':
        print fname + '_____________________________________________________________'
        print variables_values.__doc__
        quit()

# This does not work....
#    folderins = sub.Popen(["pwd"], stdout=sub.PIPE)
#    folder = list(folderins.communicate())[0].replace('\n','')
# From http://stackoverflow.com/questions/4934806/how-can-i-find-scripts-directory-with-python
    folder = os.path.dirname(os.path.realpath(__file__))

    infile = folder + '/variables_values.dat'

    if not os.path.isfile(infile):
        print errormsg
        print '  ' + fname + ": File '" + infile + "' does not exist !!"
        quit(-1)

# Variable name might come with a statistical surname...
    stats=['min','max','mean','stdv', 'sum']

# Variables with a statistical section on their name...
    NOstatsvars = ['zmaxth', 'zmax_th', 'lmax_th', 'lmaxth']

    ifst = False
    if not searchInlist(NOstatsvars, varName.lower()):
        for st in stats:
            if varName.find(st) > -1:
                print '    '+ fname + ": varibale '" + varName + "' with a " +       \
                  "statistical surname: '",st,"' !!"
                Lst = len(st)
                LvarName = len(varName)
                varn = varName[0:LvarName - Lst]
                ifst = True
                break
    if not ifst:
        varn = varName

    ncf = open(infile, 'r')

    for line in ncf:
        if line[0:1] != '#':
            values = line.replace('\n','').split(',')
            if len(values) != 8:
                print errormsg
                print "problem in varibale:'", values[0],                            \
                  'it should have 8 values and it has',len(values)
                quit(-1)

            if varn[0:6] == 'varDIM': 
# Variable from a dimension (all with 'varDIM' prefix)
                Lvarn = len(varn)
                varvals = [varn[6:Lvarn+1], varn[6:Lvarn+1], 0., 1.,                 \
                  "variable|from|size|of|dimension|'" + varn[6:Lvarn+1] + "'", '1',  \
                   'rainbow']
            else:
                varvals = [values[1].replace(' ',''), values[2].replace(' ',''),     \
                  np.float(values[3]), np.float(values[4]),values[5].replace(' ',''),\
                  values[6].replace(' ',''), values[7].replace(' ','')]
            if values[0] == varn:
                ncf.close()
                return varvals
                break

    print errormsg
    print '  ' + fname + ": variable '" + varn + "' not defined !!!"
    ncf.close()
    quit(-1)

    return 

####### ###### ##### #### ### ## #

parser = OptionParser()
parser.add_option("-f", "--TS_file", dest="lfile",
  help="Time Series ASCII text file to use", metavar="FILE")
parser.add_option("-s", "--SimulationStartTime", dest="stime",
  help="Starting time of the simulation ([YYYY][MM][DD][HH][MI][SS] format)", metavar="DATE")

(opts, args) = parser.parse_args()


tsvn = ['t', 'q', 'u', 'v', 'psfc', 'glw', 'gsw', 'hfx', 'lh', 'tsk', 'tslb1', 'rainc', 'rainnc', 'clw']

tsvln = ['2 m Temperature', '2 m vapor mixing ratio', '10 m U wind (earth-relative)', '10 m V wind (earth-relative)', 'surface pressure', 'downward longwave radiation flux at the ground (downward is positive)', 'net shortwave radiation flux at the ground (downward is positive)', 'surface sensible heat flux (upward is positive)', 'surface latent heat flux (upward is positive)', 'skin temperature', 'top soil layer temperature', 'rainfall from a cumulus scheme', 'rainfall from an explicit scheme', 'total column-integrated water vapor and cloud variables']

tsvu = ['K', 'kg/kg', 'm/s', 'm/s', 'Pa', 'W/m2', 'W/m2', 'W/m2', 'W/m2', 'K', 'K', 'mm', 'mm', '1']


#######    #######
## MAIN
    #######

ofile = 'ts.nc'
Ntsvariables = len(tsvn)

if not os.path.isfile(opts.lfile):
    print errormsg
    print '  ' + main + ': Time-Series ASCII text file "' + opts.lfile +             \
      '" does not exist !!'
    print errormsg
    quit()

if opts.stime is None:
    print errormsg
    print '  ' + main + ': No initial date/time of the simulation is provided!'
    quit(-1)
else:
    stime = opts.stime
    refdate = stime[0:4] + '-' + stime[4:6] + '-' + stime[6:8] + ' ' + stime[8:10] + \
      ':' + stime[10:12] + ':' + stime[12:14]

objlfile = open(opts.lfile, 'r')

objofile = NetCDFFile(ofile, 'w')

# Creation of dimensions
##
objofile.createDimension('time',None)

set_attribute(objofile, 'author', 'Lluis Fita Borrell')
set_attribute(objofile, 'institution', 'Laboratoire Meteorologique Dynamique')
set_attribute(objofile, 'university', 'University Pierre et Marie Curie')
set_attribute(objofile, 'center', 'Centre national de la recherche scientifique')
set_attribute(objofile, 'country', 'France')
set_attribute(objofile, 'city', 'Paris')
set_attribute(objofile, 'script', 'TS_ASCII_netCFD.py')
set_attribute(objofile, 'version', '1.0')

time_step = []
psfc = []
rainc = []
rainnc = []
drydens = []

tsvals = {}

iline=0
itz = 0
for line in objlfile:
    values = reduce_spaces(line)
#    print iline, values[0], dimz, Searchdimz
# Writting general information
    if iline == 0:
        newvar = objofile.createVariable('station','c')
        valueshead = ts_header(line)

        set_attribute(newvar, 'name', reduce_last_spaces(valueshead[0]))
        set_attribute(newvar, 'acronym',valueshead[3].replace(' ',''))

        set_attribute(newvar, 'real_lon', valueshead[5])
        set_attribute(newvar, 'real_lat', valueshead[7])

        set_attribute(newvar, 'x_grid_point', valueshead[9])
        set_attribute(newvar, 'y_grid_point', valueshead[11])

        set_attribute(newvar, 'model_lon', valueshead[13])
        set_attribute(newvar, 'model_lat', valueshead[15])
        set_attribute(newvar, 'model_height', valueshead[17])
        simstarttime = refdate
    else:
        tsvals[itz] = values
        time_step.append(np.float(values[1]))
        itz = itz + 1
    iline = iline + 1

dimt = len(time_step)

print '  Found:',dimt,'time steps'
objlfile.close()

time_stepv = np.zeros((dimt), dtype=np.float)
tsvaluesv = np.zeros( (dimt,Ntsvariables), dtype= np.float)

pracc = np.zeros((dimt), dtype=np.float)

itz = 0
for it in range(dimt):
    time_stepv[it] = np.float(time_step[it])

    for iv in range(Ntsvariables):
        tsvaluesv[it,iv] = np.float(tsvals[itz][iv+5])

    pracc[it] = np.float(tsvals[it][16]) + np.float(tsvals[it][17])
    itz = itz + 1

# time
newvar = objofile.createVariable('time','f8',('time'))
newvar[:] = time_stepv*3600.
newattr = basicvardef(newvar, 'time', 'time', 'seconds since ' +               \
  simstarttime.replace('_',' '))
set_attribute(newvar, 'calendar', 'standard')

dt = time_stepv[1] - time_stepv[0]

# time-series variables
for iv in range(Ntsvariables):
    if tsvn[iv] == 't' or tsvn[iv] == 'u' or tsvn[iv] == 'v' or tsvn[iv] == 'q': 
      varname, stdname, minvar, maxvar, longname, unitsvar, cbarvar =                \
      variables_values('TS' + tsvn[iv])
      tsu = unitsvar
    else:
      varname, stdname, minvar, maxvar, longname, unitsvar, cbarvar =                \
      variables_values(tsvn[iv])
      tsu = tsvu[iv]

    newvar = objofile.createVariable(varname, 'f4', ('time'))
    newvar[:] = tsvaluesv[:,iv]

    newattr = basicvardef(newvar, stdname, longname.replace('|',' '), tsu)
    newattr = set_attribute(newvar, 'wrfTSname', tsvn[iv])
    newattr = set_attribute(newvar, 'wrfTSdesc', tsvln[iv])

# Extra vars

# pr
varvals = np.zeros((dimt), dtype=np.float)
varvals[1:dimt] = pracc[1:dimt] - pracc[0:dimt-1]
varname, stdname, minvar, maxvar, longname, unitsvar, cbarvar =                      \
      variables_values('RAINTOT')

newvar = objofile.createVariable(varname, 'f4', ('time'))
newvar[:] = varvals / dt
newattr = basicvardef(newvar, stdname, longname.replace('|',' '), unitsvar )

objofile.sync()
objofile.close()

print 'Successfull generation of Time-Series netCDF file "' + ofile + '" !!!!!'
