COMPMID-1182: printf doesn't work

Change-Id: I013d57f6e2becbd6d2d7700ce5fbbeca670443c4
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133735
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index ab39d0f..0b45dfe 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -63,8 +63,9 @@
     std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
 
 private:
-    CLTuner           _tuner;     /**< CL kernel tuner */
-    CLBufferAllocator _allocator; /**< CL buffer affinity allocator */
+    bool                               _initialized; /**< Flag that specifies if the backend has been default initialized */
+    CLTuner                            _tuner;       /**< CL kernel tuner */
+    std::unique_ptr<CLBufferAllocator> _allocator;   /**< CL buffer affinity allocator */
 };
 } // namespace backends
 } // namespace graph
diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h
index dc0e2b0..ba78922 100644
--- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h
+++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h
@@ -53,7 +53,8 @@
     std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
 
 private:
-    GCBufferAllocator _allocator; /**< GLES buffer affinity allocator */
+    bool              _initialized; /**< Flag that specifies if the backend has been default initialized */
+    GCBufferAllocator _allocator;   /**< GLES buffer affinity allocator */
 };
 } // namespace backends
 } // namespace graph
diff --git a/arm_compute/graph/detail/ExecutionHelpers.h b/arm_compute/graph/detail/ExecutionHelpers.h
index 27cae4b..23dd207 100644
--- a/arm_compute/graph/detail/ExecutionHelpers.h
+++ b/arm_compute/graph/detail/ExecutionHelpers.h
@@ -39,8 +39,6 @@
 
 namespace detail
 {
-/** Initializes the available backends **/
-void default_initialize_backends();
 /** Validates all nodes
  *
  * @param[in] g Graph to validate
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 26b459c..bdd779b 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -32,16 +32,6 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/ICLTuner.h"
 
-#if defined(ARM_COMPUTE_DEBUG_ENABLED)
-namespace
-{
-void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data)
-{
-    printf("%.*s", len, buffer);
-}
-}
-#endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */
-
 namespace arm_compute
 {
 class ICLKernel;
@@ -68,43 +58,7 @@
      *
      * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
      */
-    void default_init(ICLTuner *cl_tuner = nullptr)
-    {
-        if(!_is_initialised)
-        {
-#if defined(ARM_COMPUTE_DEBUG_ENABLED)
-            bool is_cl_arm_printf_supported = false;
-
-            //query devices in the context for cl_arm_printf support
-            std::vector<cl::Device> def_platform_devices;
-            cl::Platform::getDefault().getDevices(CL_DEVICE_TYPE_DEFAULT, &def_platform_devices);
-            is_cl_arm_printf_supported = device_supports_extension(def_platform_devices[0], "cl_arm_printf");
-
-            if(is_cl_arm_printf_supported)
-            {
-                // Create a cl_context with a printf_callback and user specified buffer size.
-                cl_context_properties properties[] =
-                {
-                    // Enable a printf callback function for this context.
-                    CL_PRINTF_CALLBACK_ARM, reinterpret_cast<cl_context_properties>(printf_callback),
-                    // Request a minimum printf buffer size of 4MB for devices in the
-                    // context that support this extension.
-                    CL_PRINTF_BUFFERSIZE_ARM, static_cast<cl_context_properties>(0x100000),
-                    CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(cl::Platform::get()()),
-                    0
-                };
-                cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT, properties));
-            }
-#endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
-
-            CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault());
-            init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault(), cl_tuner);
-        }
-        else
-        {
-            _cl_tuner = cl_tuner;
-        }
-    }
+    void default_init(ICLTuner *cl_tuner = nullptr);
     /** Schedule the execution of the passed kernel if possible.
      *
      * @param[in] kernel Kernel to execute.
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index 9e3466b..b2c5a44 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -33,10 +33,6 @@
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
 
-namespace
-{
-} // namespace
-
 /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 21a0e68..7f1667a 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -741,6 +741,11 @@
     }
     std::string concat_str;
 
+#if defined(ARM_COMPUTE_DEBUG_ENABLED)
+    // Enable debug properties in CL kernels
+    concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
+#endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
+
     if(fp16_supported())
     {
         concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index a67e5b2..0ea3254 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -38,7 +38,6 @@
 GraphManager::GraphManager()
     : _workloads()
 {
-    detail::default_initialize_backends();
 }
 
 void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &pm, Target target)
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 7f2be67..b235c3a 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -66,7 +66,7 @@
 static const std::string tuner_data_filename = "acl_tuner.csv";
 
 CLDeviceBackend::CLDeviceBackend()
-    : _tuner(), _allocator(cl::Context::getDefault())
+    : _initialized(false), _tuner(), _allocator(nullptr)
 {
 }
 
@@ -96,11 +96,18 @@
     CLScheduler::get().default_init(&_tuner);
 
     // Create allocator with new context
-    _allocator = CLBufferAllocator();
+    _allocator = support::cpp14::make_unique<CLBufferAllocator>();
 }
 
 void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
+    // Force backend initialization
+    if(!_initialized)
+    {
+        initialize_backend();
+        _initialized = true;
+    }
+
     // Setup tuner
     set_kernel_tuning(ctx.config().use_tuner);
 
@@ -124,7 +131,7 @@
 
 IAllocator *CLDeviceBackend::backend_allocator()
 {
-    return &_allocator;
+    return _allocator.get();
 }
 
 std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tensor)
@@ -180,7 +187,7 @@
     auto pool_mgr     = std::make_shared<PoolManager>();
     auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
 
-    mm->set_allocator(&_allocator);
+    mm->set_allocator(_allocator.get());
 
     return mm;
 }
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 770cca5..bfac31a 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -53,7 +53,7 @@
 static detail::BackendRegistrar<GCDeviceBackend> GCDeviceBackend_registrar(Target::GC);
 
 GCDeviceBackend::GCDeviceBackend()
-    : _allocator()
+    : _initialized(false), _allocator()
 {
 }
 
@@ -65,6 +65,13 @@
 
 void GCDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
+    // Force backend initialization
+    if(!_initialized)
+    {
+        initialize_backend();
+        _initialized = true;
+    }
+
     // Setup a management backend
     if(ctx.memory_management_ctx(Target::GC) == nullptr)
     {
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index c370fdf..d68092a 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -35,14 +35,6 @@
 {
 namespace detail
 {
-void default_initialize_backends()
-{
-    for(const auto &backend : backends::BackendRegistry::get().backends())
-    {
-        backend.second->initialize_backend();
-    }
-}
-
 void validate_all_nodes(Graph &g)
 {
     auto &nodes = g.nodes();
diff --git a/src/runtime/CL/CLMemory.cpp b/src/runtime/CL/CLMemory.cpp
index 534c4f9..bbc513d 100644
--- a/src/runtime/CL/CLMemory.cpp
+++ b/src/runtime/CL/CLMemory.cpp
@@ -61,7 +61,7 @@
 
 void CLMemory::create_empty_region()
 {
-    _region_owned = std::make_shared<CLBufferMemoryRegion>(cl::Context::getDefault(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 0);
+    _region_owned = std::make_shared<CLBufferMemoryRegion>(cl::Context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 0);
     _region       = _region_owned.get();
 }
 } // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index fdae615..c348dfa 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -28,6 +28,16 @@
 
 using namespace arm_compute;
 
+#if defined(ARM_COMPUTE_DEBUG_ENABLED)
+namespace
+{
+void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data)
+{
+    printf("%.*s", len, buffer);
+}
+} // namespace
+#endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */
+
 std::once_flag CLScheduler::_initialize_symbols;
 
 CLScheduler::CLScheduler()
@@ -42,6 +52,44 @@
     return scheduler;
 }
 
+void CLScheduler::default_init(ICLTuner *cl_tuner)
+{
+    if(!_is_initialised)
+    {
+        cl::Context ctx              = cl::Context::getDefault();
+        auto        queue_properties = cl::CommandQueue::getDefault().getInfo<CL_QUEUE_PROPERTIES>(nullptr);
+#if defined(ARM_COMPUTE_DEBUG_ENABLED)
+        // Query devices in the context for cl_arm_printf support
+        std::vector<cl::Device> def_platform_devices;
+        cl::Platform::getDefault().getDevices(CL_DEVICE_TYPE_DEFAULT, &def_platform_devices);
+
+        if(device_supports_extension(def_platform_devices[0], "cl_arm_printf"))
+        {
+            // Create a cl_context with a printf_callback and user specified buffer size.
+            cl_context_properties properties[] =
+            {
+                CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(cl::Platform::get()()),
+                // Enable a printf callback function for this context.
+                CL_PRINTF_CALLBACK_ARM, reinterpret_cast<cl_context_properties>(printf_callback),
+                // Request a minimum printf buffer size of 4MB for devices in the
+                // context that support this extension.
+                CL_PRINTF_BUFFERSIZE_ARM, 0x1000,
+                0
+            };
+            ctx = cl::Context(CL_DEVICE_TYPE_DEFAULT, properties);
+        }
+#endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
+
+        cl::CommandQueue queue = cl::CommandQueue(ctx, cl::Device::getDefault(), queue_properties);
+        CLKernelLibrary::get().init("./cl_kernels/", ctx, cl::Device::getDefault());
+        init(ctx, queue, cl::Device::getDefault(), cl_tuner);
+    }
+    else
+    {
+        _cl_tuner = cl_tuner;
+    }
+}
+
 void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised,
diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp
index fd0afe9..7e1f293 100644
--- a/tests/framework/Framework.cpp
+++ b/tests/framework/Framework.cpp
@@ -534,12 +534,15 @@
             // Every 5000 tests, reset the OpenCL context to release the allocated memory
             if((id_run_test % 5000) == 0)
             {
-                cl::Context::setDefault(cl::Context());
-                CLScheduler::get().set_context(cl::Context());
-                CLKernelLibrary::get().clear_programs_cache();
+                auto ctx_properties   = CLScheduler::get().context().getInfo<CL_CONTEXT_PROPERTIES>(nullptr);
+                auto queue_properties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>(nullptr);
 
-                cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT));
-                CLScheduler::get().set_context(cl::Context::getDefault());
+                cl::Context      new_ctx   = cl::Context(CL_DEVICE_TYPE_DEFAULT, ctx_properties.data());
+                cl::CommandQueue new_queue = cl::CommandQueue(new_ctx, cl::Device::getDefault(), queue_properties);
+
+                CLKernelLibrary::get().clear_programs_cache();
+                CLScheduler::get().set_context(new_ctx);
+                CLScheduler::get().set_queue(new_queue);
             }
 #endif // ARM_COMPUTE_CL
             run_test(test_info, *test_factory);
diff --git a/tests/validation/CL/FixedPoint/FixedPointTarget.h b/tests/validation/CL/FixedPoint/FixedPointTarget.h
index 3847354..920bd37 100644
--- a/tests/validation/CL/FixedPoint/FixedPointTarget.h
+++ b/tests/validation/CL/FixedPoint/FixedPointTarget.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -110,7 +110,7 @@
     sources.push_back(fixed_point_operation_kernel);
 
     // Create program
-    ::cl::Program program(sources);
+    ::cl::Program program(CLScheduler::get().context(), sources);
 
     // Build program
     program.build(build_opts.c_str());