.PHONY: all install clean

CC        = /usr/bin/mpicc
CFLAGS    = -O2 -std=gnu99 -Wall -Wpointer-arith -fopenmp -I../../include -I../include -I$(TLOG_DIR)
GPUCC     = nvcc
GPUCFLAGS = -arch=sm_20 -O3 -Wno-deprecated-gpu-targets
LIBDIR    = /usr/local/lib
RANLIB    = ranlib
AR        = ar
ARFLAGS   = cr
MKDIR_P   = install -m 755 -d
INSTALL   = install
TLOG_DIR  = ../../libtlog/src_mpi
LIBXMP    = libxmp.a
LIBS      = $(LIBXMP)
SOFLAGS   = -shared
PICFLAGS  = -fPIC
TMP_DIR   = .libs

HEADERS   = ../include/xmp_internal.h ../include/xmp_constant.h ../include/xmp_data_struct.h \
            ../include/xmp_math_function.h \
            ../include/xmpco_internal.h ../include/xmpco_params.h
OBJECTS   = xmp_lib.o xmp_util.o xmp_world.o xmp_nodes_stack.o xmp_nodes.o xmp_template.o \
	    xmp_barrier.o xmp_reduce.o xmp_bcast.o xmp_align.o xmp_shadow.o xmp_array_section.o \
	    xmp_loop.o xmp_gmove.o xmp_math_function.o xmp_runtime.o xmp_tlog_runtime.o xmp_io.o \
	    xmp_reflect.o xmp_pack_vector.o xmp_intrinsic.o xmp_section_desc.o xmp_sort.o xmp_async.o \
            xmpc_gmove.o xmp_reduce_shadow.o xmp_index.o

FALSE            = 1
TRUE             = 0
IS_GASNET        = 1
IS_MPI3_ONESIDED = 0
IS_FJRDMA        = 1
IS_KCOMPUTER     = 1
IS_FX10          = 1
IS_FX100         = 1
IS_MPI3_ASYNC    = 0
IS_LIBBLAS       = 1
IS_SSL2BLAMP     = 1
IS_INTELMKL      = 1
IS_TCA           = @TCA@
IS_XACC          = 1
IS_CUDA          = 1
IS_OPENCL        = 1
ENABLE_SHARED    = 1

ifeq ($(ENABLE_SHARED), $(TRUE))
	LIBS := $(LIBS) $(LIBXMP:.a=.so)
endif

ifeq ($(IS_KCOMPUTER), $(TRUE))
	CFLAGS += -D_KCOMPUTER
endif
ifeq ($(IS_FX10), $(TRUE))
	CFLAGS += -D_FX10
endif
ifeq ($(IS_FX100), $(TRUE))
	CFLAGS += -D_FX100
endif

ifeq ($(IS_GASNET), $(TRUE))
	OBJECTS += xmpco_alloc.o xmpco_get_expr.o  xmpco_get_stmt.o  xmpco_lib.o xmpco_msg.o xmpco_params.o xmpco_put.o xmpco_sync.o \
		   xmp_onesided.o xmp_onesided_gasnet.o xmp_coarray.o xmp_coarray_utils.o xmp_coarray_local.o \
		   xmp_post_wait.o xmp_post_wait_gasnet.o xmp_coarray_gasnet.o xmp_lock_unlock.o \
		   xmp_lock_unlock_gasnet.o xmp_intrinsic_gasnet.o
	CFLAGS  += -D_XMP_GASNET
else
ifeq ($(IS_MPI3_ONESIDED), $(TRUE))
#xmp_coarray_mpi_util.o xmp_lock_unlock.o xmp_lock_unlock_mpi.o
	OBJECTS += xmpco_alloc.o xmpco_get_expr.o  xmpco_get_stmt.o  xmpco_lib.o xmpco_msg.o xmpco_params.o xmpco_put.o xmpco_sync.o \
		   xmp_onesided.o xmp_onesided_mpi.o xmp_coarray.o xmp_coarray_utils.o xmp_coarray_local.o \
		   xmp_post_wait.o xmp_post_wait_mpi.o xmp_coarray_mpi.o xmp_intrinsic_mpi.o
	CFLAGS += -D_XMP_MPI3_ONESIDED
endif
endif
ifeq ($(IS_FJRDMA), $(TRUE))
	OBJECTS += xmpco_alloc.o xmpco_get_expr.o  xmpco_get_stmt.o  xmpco_lib.o xmpco_msg.o xmpco_params.o xmpco_put.o xmpco_sync.o \
	           xmp_onesided.o xmp_onesided_fjrdma.o xmp_coarray.o xmp_coarray_utils.o xmp_coarray_local.o \
		   xmp_post_wait.o xmp_post_wait_fjrdma.o xmp_coarray_fjrdma.o xmp_intrinsic_fjrdma.o
	CFLAGS  += -D_XMP_FJRDMA
endif

ifeq ($(IS_MPI3_ASYNC), $(TRUE))
	CFLAGS += -D_XMP_MPI3
endif

ifeq ($(IS_LIBBLAS), $(TRUE))
	CFLAGS += -D_XMP_LIBBLAS
endif

ifeq ($(IS_SSL2BLAMP), $(TRUE))
	CFLAGS += -D_XMP_SSL2BLAMP
endif

ifeq ($(IS_INTELMKL), $(TRUE))
	CFLAGS += -D_XMP_INTELMKL
endif

ifeq ($(IS_TCA), $(TRUE))
    OBJECTS += xmp_onesided.o xmp_onesided_tca.o xmp_coarray.o xmp_coarray_utils.o xmp_coarray_local.o \
	       xmp_post_wait.o xmp_post_wait_tca.o xmp_coarray_tca.o
    CFLAGS  += -D_XMP_TCA
endif

ifeq ($(IS_XACC), $(TRUE))
    OBJECTS  += xacc_runtime.o xmp_reflect_acc.o xmp_reduce_acc.o xmp_bcast_acc.o xmp_gmove_acc.o xmp_coarray_acc.o
    CFLAGS   += -D_XMP_XACC
    HEADERS  += ../include/xacc_internal.h

ifeq ($(IS_CUDA), $(TRUE))
    OBJECTS  += xacc_util_cuda.o xmp_gpu_pack.o
    CFLAGS   += -D_XMP_XACC_CUDA -I/include
    GPUCC     = nvcc
    GPUCFLAGS = -arch=sm_20 -O3 -Wno-deprecated-gpu-targets
.SUFFIXES: .cu
.cu.o:
	$(GPUCC) $(GPUCFLAGS) -I../include -c $@ $<

endif
ifeq ($(IS_OPENCL), $(TRUE))
    OBJECTS  += xacc_util_cl.o
    CFLAGS   += -D_XMP_XACC_OPENCL -I/include
    HEADERS  += xacc_util_cl_kernel.hex
.SUFFIXES: .cl .hex
.cl.hex:
	xxd -i < $< >$@

endif

ifeq ($(IS_TCA), $(TRUE))
    OBJECTS  += xmp_reflect_tca.o xmp_reflect_hybrid.o xmp_tca_runtime.o xmp_reduce_tca.o xmp_reduce_hybrid.o
else
    OBJECTS  += xmp_reduce_gpu.o xmp_reflect_gpu.o
endif
endif

all: $(LIBS)
$(OBJECTS): $(HEADERS)

$(LIBXMP): $(OBJECTS)
	rm -f $@
	$(AR) $(ARFLAGS) $@ $^
	$(RANLIB) $@

$(LIBXMP:.a=.so): $(addprefix $(TMP_DIR)/, $(OBJECTS))
	rm -f $@
	$(CC) $(SOFLAGS) $(PICFLAGS) $^ -o $@

$(TMP_DIR)/%.o : %.c
	@$(MKDIR_P) $(TMP_DIR)
	$(CC) $(CFLAGS) -c $< $(PICFLAGS) -o $@

.c.o:
	$(CC) $(CFLAGS) -o $@ $< -c

install: $(LIBS)
	$(MKDIR_P) $(DESTDIR)$(LIBDIR)
	$(INSTALL) $^ $(DESTDIR)$(LIBDIR)

clean:
	rm -rf *~ *.core core core.* *.o *.hex $(LIBS) $(TMP_DIR)
