//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//

#include "OpenClTimer.hpp"

#include <string>
#include <sstream>

#include <boost/core/ignore_unused.hpp>

namespace armnn
{

OpenClTimer::OpenClTimer()
{
}

void OpenClTimer::Start()
{
    m_Kernels.clear();

    auto interceptor = [this](  cl_command_queue command_queue,
                                cl_kernel        kernel,
                                cl_uint          work_dim,
                                const size_t    *gwo,
                                const size_t    *gws,
                                const size_t    *lws,
                                cl_uint          num_events_in_wait_list,
                                const cl_event * event_wait_list,
                                cl_event *       event)
        {
            boost::ignore_unused(event);
            cl_int retVal = 0;

            // Get the name of the kernel
            cl::Kernel retainedKernel(kernel, true);
            std::stringstream ss;
            ss << retainedKernel.getInfo<CL_KERNEL_FUNCTION_NAME>();

            // Embed workgroup sizes into the name
            if(gws != nullptr)
            {
                ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
            }
            if(lws != nullptr)
            {
                ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
            }

            cl_event customEvent;

            // Forward to original OpenCl function
            retVal = m_OriginalEnqueueFunction( command_queue,
                                                kernel,
                                                work_dim,
                                                gwo,
                                                gws,
                                                lws,
                                                num_events_in_wait_list,
                                                event_wait_list,
                                                &customEvent);

            // Store the Kernel info for later GetMeasurements() call
            m_Kernels.emplace_back(ss.str(), customEvent);

            return retVal;
        };

    m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
    CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
}

void OpenClTimer::Stop()
{
    CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction;
}

std::vector<Measurement> OpenClTimer::GetMeasurements() const
{
    std::vector<Measurement> measurements;

    cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>();

    int idx = 0;
    for (auto& kernel : m_Kernels)
    {
        std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name;

        double timeUs = 0.0;
        if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0)
        {
            // Wait for the event to finish before accessing profile results.
            kernel.m_Event.wait();

            cl_ulong start = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
            cl_ulong end   = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
            timeUs = static_cast<double>(end - start) / 1000.0;
        }

        measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US);
    }

    return measurements;
}

} //namespace armnn
