cmake_minimum_required(VERSION 3.7 FATAL_ERROR)
project(HPTT CXX)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(ENABLE_IBM OFF)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
  # Create a temporary file with the AVX512 check code
  file(WRITE ${CMAKE_BINARY_DIR}/avx512_check.cpp "
    #include <iostream>

    bool check_avx512_support() {
        int flag_AVX512F = 0;
        int regs[4];

        asm volatile(\"cpuid\"
                     : \"=a\"(regs[0]), \"=b\"(regs[1]), \"=c\"(regs[2]), \"=d\"(regs[3])
                     : \"a\"(7), \"c\"(0));

        flag_AVX512F = (regs[1] & (1 << 16)) != 0;   // Bit 16 of ebx for AVX512F

        return flag_AVX512F;
    }

    int main() {
        if (check_avx512_support()) {
            return 0;  // AVX512 supported
        } else {
            return 1;  // AVX512 not supported
        }
    }
")

  # Try to compile and run the temporary file
  try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
          ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/avx512_check.cpp
          RUN_OUTPUT_VARIABLE RUN_OUTPUT)

  # Check the run result
  if(RUN_RESULT_VAR EQUAL "0")
    set(HAS_AVX512 TRUE)
    set(ENABLE_AVX FALSE)
  else ()
    set(HAS_AVX512 FALSE)
    set(ENABLE_AVX TRUE)
  endif ()
  message(STATUS "AVX512 support: ${HAS_AVX512}")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64|ARM64|aarch64|AARCH64)")
  set(ENABLE_ARM ON)
endif ()

if (CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
  set(ENABLE_IBM ON)
endif ()

if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -qopenmp -xhost)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  if (ENABLE_IBM)
    set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp)
  else ()
    set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -Wno-restrict -Wno-unknown-pragmas -Wno-unused-function)
    if(NOT (APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64"))
      set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -march=native -mtune=native)
    endif ()
  endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
    if(NOT (APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64"))
      set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -march=native)
    endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -silent -w -Mnovect)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "XL")
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -qsmp=omp)
endif ()

if (ENABLE_AVX)
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -mavx -DHPTT_ARCH_AVX)
elseif (HAS_AVX512)
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -march=native -DHPTT_ARCH_AVX512)
elseif (ENABLE_ARM)
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -DHPTT_ARCH_ARM)
elseif (ENABLE_IBM)
  set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -mtune=native -DHPTT_ARCH_IBM -maltivec -mabi=altivec)
endif ()

add_subdirectory(sysinfo)
set(HPTT_SRCS src/hptt.cpp src/plan.cpp src/transpose.cpp src/utils.cpp)

add_library(hptt STATIC ${HPTT_SRCS})
target_link_libraries(hptt PUBLIC sysinfo)
target_compile_features(hptt PUBLIC cxx_std_11)
target_include_directories(hptt PUBLIC ${PROJECT_SOURCE_DIR}/include)
target_compile_options(hptt PUBLIC ${HPTT_CXX_FLAGS})
