### script to download opacities from DACE
### adapted from https://github.com/bmorris3/shone/tree/refs/heads/opacity-downloads

import logging
import warnings
from functools import cached_property
import os
import tarfile
import shutil

import numpy as np
import xarray as xr
from astropy.table import Table
from dace_query.opacity import Molecule, Atom

from chemistry import species_name_to_common_isotopologue_name

interp_kwargs = dict(
    method='nearest',
    kwargs=dict(fill_value="extrapolate")
)

__all__ = [
    'download_molecule',
    'download_atom'
]

class AvailableOpacities:
    @cached_property
    def atomic(self):
        return get_atomic_database()

    @cached_property
    def molecular(self):
        return get_molecular_database()

    def get_atomic_database_entry(self, atom, charge, line_list):
        table = self.atomic
        return table[(
            (table['atom'] == atom) &
            (table['line_list'] == line_list) &
            (table['charge'] == charge)
        )]

    def get_molecular_database_entry(self, isotopologue, line_list):
        table = self.molecular
        return table[(
            (table['isotopologue'] == isotopologue) &
            (table['line_list'] == line_list)
        )]

    def get_molecular_line_lists(self, isotopologue):
        table = self.molecular
        return set(table[table['isotopologue'] == isotopologue]['line_list'])

    def get_atomic_line_lists(self, atom):
        table = self.atomic
        return set(table[table['atom'] == atom]['line_list'])

    def get_atomic_latest_version(self, atom, charge, line_list):
        table = self.atomic
        matches = table[(
            (table['atom'] == atom) &
            (table['line_list'] == line_list) &
            (table['charge'] == charge)
        )]
        return max(set(matches['version']))

    def get_molecular_latest_version(self, isotopologue, line_list):
        table = self.molecular
        matches = table[(
            (table['isotopologue'] == isotopologue) &
            (table['line_list'] == line_list)
        )]
        return max(set(matches['version']))

    def get_atomic_pT_range(self, atom, charge, line_list):
        table = self.get_atomic_database_entry(atom, charge, line_list)
        temperature_range = (
            int(table['temp_min_k'][0]),
            int(table['temp_max_k'][0])
        )
        pressure_range = (
            float(table['pressure_min_exponent_b'][0]),
            float(table['pressure_max_exponent_b'][0])
        )
        return temperature_range, pressure_range

    def get_molecular_pT_range(self, isotopologue, line_list):
        table = self.get_molecular_database_entry(isotopologue, line_list)
        temperature_range = (
            int(table['temp_min_k'][0]),
            int(table['temp_max_k'][0])
        )
        pressure_range = (
            float(table['pressure_min_exponent_b'][0]),
            float(table['pressure_max_exponent_b'][0])
        )
        return temperature_range, pressure_range


available_opacities = AvailableOpacities()


def get_atomic_database():
    db = Atom.query_database()
    table = Table(db)
    table.add_index('atom')
    return table


def get_molecular_database():
    db = Molecule.query_database()
    table = Table(db)
    table.add_index('isotopologue')
    return table

class DownloadDace:
    def __init__(self,
                 name='48Ti-16O',
                 line_list='Toto',
                 temperature_range=None,
                 pressure_range=None,
                 version=1,
                 output_dir="tmp"
    ):
        self.output_dir = output_dir
        self.name = name
        self.line_list = line_list
        self.temperature_range = temperature_range
        self.pressure_range = pressure_range
        self.version = version

    def dace_download_molecule(self):
        os.makedirs(self.output_dir, exist_ok=True)
        archive_name = self.name + '__' + self.line_list + '.tar.gz'
        path = os.path.join(self.output_dir, archive_name)
        if os.path.exists(path):
            print("Molecule already downloaded")
            return path
        Molecule.download(
            self.name,
            self.line_list,
            float(self.version),
            self.temperature_range,
            self.pressure_range,
            output_directory=self.output_dir,
            output_filename=archive_name
        )

        return path


    def dace_download_atom(self, charge=0):
        os.makedirs(self.output_dir, exist_ok=True)
        archive_name = self.name + '__' + self.line_list + '.tar.gz'
        Atom.download(
            self.name, charge,
            self.line_list, float(self.version),
            self.temperature_range,
            self.pressure_range,
            output_directory=self.output_dir,
            output_filename=archive_name
        )
        #### Atoms don't download anything for now, reason unknown

        return os.path.join(self.output_dir, archive_name)


    def untar_bin_files(self, archive_name):
        def bin_files(members):
            for tarinfo in members:
                if os.path.splitext(tarinfo.name)[1] == ".bin":
                    yield tarinfo

        path = os.path.join(self.output_dir, self.name + '__' + self.line_list)
        with tarfile.open(archive_name, 'r') as tar:
            tar.extractall(path=path , members=bin_files(tar))

        return path


    def opacity_dir_to_netcdf(self, opacity_dir, outpath):
        outpath = self.output_dir + '/' + outpath
        if os.path.exists(outpath):
            print(f"{outpath} already exists")
            return
        temperature_grid = []
        pressure_grid = []
        wl_end = np.inf

        for dirpath, dirnames, filenames in os.walk(opacity_dir):
            for filename in filenames:
                if not filename.endswith('.bin'):
                    continue

                # Wavenumber points from range given in the file names
                temperature = int(filename.split('_')[3])
                sign = 1 if filename.split('_')[4][0] == 'p' else -1
                pressure = 10 ** (sign * float(filename.split('_')[4][1:].split('.')[0]) / 100)

                wl_start = int(filename.split('_')[1])
                wl_end = min(wl_end, int(filename.split('_')[2]))
                wlen = np.arange(wl_start, wl_end, 0.01)

                # catch divide by zero warning here:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', RuntimeWarning)

                    # Convert to micron
                    wavelength = 1 / wlen / 1e-4

                unique_wavelengths = wavelength[1:][::-1]
                temperature_grid.append(temperature)
                pressure_grid.append(pressure)

        tgrid = np.sort(list(set(temperature_grid)))
        pgrid = np.sort(list(set(pressure_grid)))

        if len(pgrid) == 1:
            extrapolate_pgrid = True
            pgrid = np.concatenate([pgrid, 10 ** (-1 * np.log10(pgrid))])
        else:
            extrapolate_pgrid = False

        opacity_grid = np.zeros(
            (len(tgrid), len(pgrid), len(unique_wavelengths)), dtype='float32'
        )

        for dirpath, dirnames, filenames in os.walk(opacity_dir):
            for filename in filenames:
                if not filename.endswith('.bin'):
                    continue

                opacity = np.fromfile(
                    os.path.join(dirpath, filename), dtype=np.float32
                )[1:][::-1]

                # Wavenumber points from range given in the file names
                temperature = int(filename.split('_')[3])
                sign = 1 if filename.split('_')[4][0] == 'p' else -1
                pressure = 10 ** (sign * float(filename.split('_')[4][1:].split('.')[0]) / 100)

                temperature_ind = np.argmin(np.abs(tgrid - temperature))
                pressure_ind = np.argmin(np.abs(pgrid - pressure))

                opacity_grid[temperature_ind, pressure_ind, :] = opacity[:len(unique_wavelengths)]

        if extrapolate_pgrid:
            for dirpath, dirnames, filenames in os.walk(opacity_dir):
                for filename in filenames:
                    opacity = np.fromfile(
                        os.path.join(dirpath, filename), dtype=np.float32
                    )[1:][::-1]

                    # Wavenumber points from range given in the file names
                    temperature = int(filename.split('_')[3])
                    # *Flip the sign for the extrapolated grid point in pressure*
                    sign = -1 if filename.split('_')[4][0] == 'p' else 1
                    pressure = 10 ** (sign * float(filename.split('_')[4][1:].split('.')[0]) / 100)

                    temperature_ind = np.argmin(np.abs(tgrid - temperature))
                    pressure_ind = np.argmin(np.abs(pgrid - pressure))

                    opacity_grid[temperature_ind, pressure_ind, :] = opacity

        ds = xr.Dataset(
            data_vars=dict(
                opacity=(["temperature", "pressure", "wavelength"],
                        opacity_grid)
            ),
            coords=dict(
                temperature=(["temperature"], tgrid),
                pressure=(["pressure"], pgrid),
                wavelength=unique_wavelengths
            )
        )
        if not os.path.exists(os.path.dirname(outpath)):
            os.makedirs(os.path.dirname(outpath), exist_ok=True)

        ds.to_netcdf(outpath if outpath.endswith(".nc") else outpath + '.nc',
                    encoding={'opacity': {'dtype': 'float32'}})


    def clean_up(self, bin_dir, archive_name):
        os.remove(archive_name)
        shutil.rmtree(bin_dir)


def download_molecule(
    molecule=None,
    isotopologue=None,
    line_list='first-found',
    temperature_range=None,
    pressure_range=None,
    version=None,
    output_dir="datadir/dace"
):
    """
    Download molecular opacity data from DACE.

    .. warning::
        This generates *very* large files. Only run this
        method if you have ~6 GB available per molecule.

    Parameters
    ----------
    isotopologue : str
        For example, "1H2-16O" for water.
    molecule : str
        Common name for the molecule, for example: "H2O"
    line_list : str, default is ``'first-found'``, optional
        For example, "POKAZATEL" for water. By default, the first available
        line list for this isotopologue is chosen.
    temperature_range : tuple, optional
        Tuple of integers specifying the min and max
        temperature requested. Defaults to the full
        range of available temperatures.
    pressure_range : tuple, optional
        Tuple of floags specifying the log base 10 of the
        min and max pressure [bar] requested. Defaults to the full
        range of available pressures.
    version : float, optional
        Version number of the line list in DACE. Defaults to the
        latest version.
    """
    if molecule is not None:
        isotopologue = species_name_to_common_isotopologue_name(molecule)

    available_line_lists = available_opacities.get_molecular_line_lists(isotopologue)
    print(f"Available line lists for {isotopologue}:")
    print(available_line_lists)

    if line_list is None:
        return

    if line_list == 'first-found':
        line_list = sorted(list(available_line_lists)).pop()
        logging.warning(f"Using first-found line list for {isotopologue}: '{line_list}'")

    elif line_list not in available_line_lists:
        raise ValueError(f"The requested '{line_list}' is not in the set of "
                         f"available line lists {available_line_lists}.")

    if version is None:
        version = available_opacities.get_molecular_latest_version(isotopologue, line_list)
        logging.warning(f"Using latest version of the line "
                        f"list '{line_list}' for {isotopologue}: {version}")

    if temperature_range is None or pressure_range is None:
        dace_temp_range, dace_press_range = available_opacities.get_molecular_pT_range(
            isotopologue, line_list
        )

    if temperature_range is None:
        temperature_range = dace_temp_range

    if pressure_range is None:
        pressure_range = dace_press_range

    download_dace = DownloadDace(isotopologue, line_list, temperature_range, pressure_range, version, output_dir)
    archive_name = download_dace.dace_download_molecule()

    bin_dir = download_dace.untar_bin_files(archive_name)

    download_dace.opacity_dir_to_netcdf(bin_dir, isotopologue + '__' + line_list + '.nc')
    # clean_up(bin_dir, archive_name)
    return isotopologue + '__' + line_list + '.nc'


def download_atom(atom, charge, line_list='first-found',
                  temperature_range=None, pressure_range=None, version=None, output_dir="tmp"):
    """
    Download atomic opacity data from DACE.

    .. warning::
        This generates *very* large files. Only run this
        method if you have ~6 GB available per molecule.

    Parameters
    ----------
    atom : str
        For example, "Na" for sodium.
    charge : int
        For example, 0 for neutral.
    line_list : str, default is ``'first-found'``, optional
        For example, "Kurucz". By default, the first available
        line list for this atom/charge is chosen.
    temperature_range : tuple, optional
        Tuple of integers specifying the min and max
        temperature requested. Defaults to the full
        range of available temperatures.
    pressure_range : tuple, optional
        Tuple of floags specifying the log base 10 of the
        min and max pressure [bar] requested. Defaults to the full
        range of available pressures.
    version : float, optional
        Version number of the line list in DACE. Defaults to the
        latest version.
    """
    available_line_lists = available_opacities.get_atomic_line_lists(atom)
    print(f"Available line lists for {atom}:")
    print(available_line_lists)

    if line_list is None:
        return

    if line_list == 'first-found':
        line_list = sorted(list(available_line_lists)).pop()
        logging.warning(f"Using first-found line list for {atom}: '{line_list}'")

    elif line_list not in available_line_lists:
        raise ValueError(f"The requested '{line_list}' is not in the set of "
                         f"available line lists {available_line_lists}.")

    if temperature_range is None or pressure_range is None:
        dace_temp_range, dace_press_range = available_opacities.get_atomic_pT_range(
            atom, charge, line_list
        )

    if version is None:
        version = available_opacities.get_atomic_latest_version(atom, charge, line_list)
        logging.warning(f"Using latest version of the line "
                        f"list '{line_list}' for {atom}: {version}")

    if temperature_range is None:
        temperature_range = dace_temp_range

    if pressure_range is None:
        pressure_range = dace_press_range

    download_dace = DownloadDace(atom, line_list, temperature_range, pressure_range, version, output_dir)

    archive_name = download_dace.dace_download_atom(charge)
    bin_dir = download_dace.untar_bin_files(archive_name)

    download_dace.opacity_dir_to_netcdf(bin_dir, atom + '_' + str(int(charge)) + '__' + line_list + '.nc')
    # download_dace.clean_up(bin_dir, archive_name)