Add pre-commit support for sanity checks

Use pre-commit framework [1] to run black and flake8 before the commit.
black and flake8 are managed by the pre-commit framework and they can be
run manually by the user using `pre-commit run` command.

Fix the code base with the help of black and flake8.
Fix import statements according to PEP8 guidelines [1]
Both tools have the following settings (specified in the pre-commit
configuration file):
* line length: 120 characters
* directory to exclude: ethosu/vela/tflite/ and ethosu/vela/ethos_u55_regs

Updated README.md on how to install pre-commit and how to run sanity checks.
Pipenv files have been updated including new dependencies for pre-commit.

[1]: https://www.python.org/dev/peps/pep-0008/#imports
[2]: https://github.com/pre-commit/pre-commit

Change-Id: I304d9fffdf019d390ffa396a529c8a7c2437f63d
Signed-off-by: Diego Russo <diego.russo@arm.com>
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 120cf8b..460cf01 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -22,25 +22,19 @@
 
 from collections import defaultdict
 from enum import Enum, IntEnum
+
+import numpy as np
+
+from . import scaling
 from .high_level_command_stream import CommandType
-from .ethos_u55_regs.ethos_u55_regs import *
-from .tensor import MemArea, TensorBlockTraversal
+from .ethos_u55_regs.ethos_u55_regs import cmd0, cmd1, acc_format, elementwise_mode, rounding, activation, ifm_precision
+from .tensor import MemArea, TensorBlockTraversal, TensorFormat
 from .operation import NpuBlockType
 from .numeric_util import quantise_float32, round_up, round_away_zero, round_up_to_int, clamp_sigmoid, clamp_tanh
 from .data_type import BaseType, DataType
-import numpy as np
 from .shared_buffer_allocation import SharedBufferAllocation
 from .architecture_features import SharedBufferArea, SHRAMElements, ArchitectureFeatures
-from .nn_graph import TensorFormat, SchedulingStrategy
-from .range_set import (
-    MemoryAccessSet,
-    AccessDirection,
-)
-from .mark_tensors import (
-    reshape_operations,
-)
 from .architecture_features import Block, Kernel, Rect
-from . import scaling
 
 
 class RegisterMachine:
@@ -372,7 +366,6 @@
             param = relative_dep[CommandType.DMA][0]
             param = min(param, 0xF)  # Clamp to allowable wait amount
             emit.cmd_wait(cmd0.NPU_OP_DMA_WAIT, param, absolute_dep[CommandType.DMA][0])
-            prev_cmd = None  # Clear any dependency
 
     for cmd in cmd_stream:
         if cmd.cmdtype == CommandType.DMA:
@@ -684,7 +677,7 @@
             ifm_max = cmd.ifm_tensor.quantization.max
 
             # Emit commands for any fused activation function
-            if faf == None:
+            if faf is None:
                 emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation.NONE)
                 # Even if no activation function, values need to be set to override previous values
                 faf_min = ofm_quant_qmin
@@ -765,13 +758,13 @@
                 ),
             ):
 
-                if tens == None:
+                if tens is None:
                     continue
 
-                need_zero_point = (faf != None) or (fmf == "ConcatSliceWrite")
+                need_zero_point = (faf is not None) or (fmf == "ConcatSliceWrite")
                 if (
                     primary_op.type in set(("AvgPool", "AvgPoolAct")) and not need_zero_point
-                ) or tens.quantization == None:
+                ) or tens.quantization is None:
                     # Actual integer operation, just set scale to 1 and zero point to 0
                     emit.cmd0_with_param(zero_point_op, 0)
                 else: