mlw_codec: Fixed alignment warning

- Restructured pointer API to prevent alignment warnings
- Changed weight tensor data type to np.int16

Change-Id: I310c1ca733bf98724c84e8b2194becb4be3e7eea
diff --git a/ethosu/mlw_codec/mlw_codecmodule.c b/ethosu/mlw_codec/mlw_codecmodule.c
index b752a4e..ddc8e7e 100644
--- a/ethosu/mlw_codec/mlw_codecmodule.c
+++ b/ethosu/mlw_codec/mlw_codecmodule.c
@@ -81,7 +81,7 @@
 
     PyArrayObject* input_ndarray_object = (PyArrayObject*)PyArray_FROM_OTF(
         input_object,
-        NPY_INT64,
+        NPY_INT16,
         NPY_ARRAY_ALIGNED);
     if (input_ndarray_object == NULL)
     {
@@ -99,13 +99,19 @@
     int kernel_width = (int)PyArray_DIM(input_ndarray_object, 2);
     int ifm_depth = (int)PyArray_DIM(input_ndarray_object, 3);
 
-    int64_t* brick_weights = (int64_t*)PyArray_DATA(input_ndarray_object);
+    int16_t* brick_weights = (int16_t*)PyArray_DATA(input_ndarray_object);
     int brick_strides[4];
     for (int i = 0; i < 4; i++)
     {
-        brick_strides[i] = (int)PyArray_STRIDE(input_ndarray_object, i);
+        int stride = (int)PyArray_STRIDE(input_ndarray_object, i);
+        if (stride % sizeof(int16_t))
+        {
+            PyErr_SetString(PyExc_ValueError, "Invalid stride");
+            return NULL;
+        }
+        brick_strides[i] = stride / sizeof(int16_t);
     }
-    if ((unsigned)PyArray_ITEMSIZE(input_ndarray_object) != sizeof(int64_t))
+    if ((unsigned)PyArray_ITEMSIZE(input_ndarray_object) != sizeof(int16_t))
     {
         PyErr_SetString(PyExc_ValueError, "Invalid input type");
         return NULL;
diff --git a/ethosu/mlw_codec/mlw_encode.c b/ethosu/mlw_codec/mlw_encode.c
index cac5e98..02e9253 100644
--- a/ethosu/mlw_codec/mlw_encode.c
+++ b/ethosu/mlw_codec/mlw_encode.c
@@ -898,21 +898,21 @@
 
 struct brick_buf_s
 {
-    uint8_t* buf;
+    int16_t* buf;
     int* strides;
 };
 typedef struct brick_buf_s brick_buf_t;
 
 static int16_t get_brick_weight(brick_buf_t* buf, int ofm_z, int wy, int wx, int ifm_z)
 {
-    uint8_t* p = buf->buf;
+    int16_t* p = buf->buf;
 
     p += ofm_z * buf->strides[0];
     p += wy * buf->strides[1];
     p += wx * buf->strides[2];
     p += ifm_z * buf->strides[3];
 
-    return *(int16_t*)p;
+    return *p;
 }
 
 static void reorder_free(int16_t* buf)
@@ -931,7 +931,7 @@
     int kernel_width,
     int ifm_depth,
     int* strides,
-    void* inbuf,
+    int16_t* inbuf,
     int ofm_block_depth,
     int is_depthwise,
     int is_partkernel,
@@ -1070,7 +1070,7 @@
     int kernel_width,
     int ifm_depth,
     int* brick_strides,
-    void* inbuf,
+    int16_t* inbuf,
     int ofm_block_depth,
     int is_depthwise,
     int is_partkernel,
diff --git a/ethosu/mlw_codec/mlw_encode.h b/ethosu/mlw_codec/mlw_encode.h
index 743603b..68218f3 100644
--- a/ethosu/mlw_codec/mlw_encode.h
+++ b/ethosu/mlw_codec/mlw_encode.h
@@ -47,7 +47,7 @@
     int kernel_width,
     int ifm_depth,
     int* brick_strides,
-    void* inbuf,
+    int16_t* inbuf,
     int ofm_block_depth,
     int is_depthwise,
     int is_partkernel,
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 4ce03d5..5f21139 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -314,8 +314,8 @@
         assert weight_tens.quantization.zero_point is not None
 
         # Early zero-point correction
-        quant_buf = weight_tens.quant_values.astype(np.int64)
-        weights = quant_buf - weight_tens.quantization.zero_point
+        quant_buf = weight_tens.quant_values.astype(np.int16)
+        weights = quant_buf - weight_tens.quantization.zero_point.astype(np.int16)
 
         if len(weights.shape) == 2:
             weights = np.expand_dims(np.expand_dims(weights, axis=0), axis=0)