Add pre-commit support for sanity checks

Use pre-commit framework [1] to run black and flake8 before the commit.
black and flake8 are managed by the pre-commit framework and they can be
run manually by the user using `pre-commit run` command.

Fix the code base with the help of black and flake8.
Fix import statements according to PEP8 guidelines [1]
Both tools have the following settings (specified in the pre-commit
configuration file):
* line length: 120 characters
* directory to exclude: ethosu/vela/tflite/ and ethosu/vela/ethos_u55_regs

Updated README.md on how to install pre-commit and how to run sanity checks.
Pipenv files have been updated including new dependencies for pre-commit.

[1]: https://www.python.org/dev/peps/pep-0008/#imports
[2]: https://github.com/pre-commit/pre-commit

Change-Id: I304d9fffdf019d390ffa396a529c8a7c2437f63d
Signed-off-by: Diego Russo <diego.russo@arm.com>
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..0695eff
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+exclude: '^ethosu/vela/(tflite|ethos_u55_regs)/'
+repos:
+- repo: https://github.com/ambv/black
+  rev: stable
+  hooks:
+  - id: black
+    language_version: python3.6
+    args: [--line-length=120]
+
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 3.7.9
+  hooks:
+  - id: flake8
+    args: [--max-line-length=120, --extend-ignore=E203]
diff --git a/Pipfile b/Pipfile
index 300bef6..33f941f 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,6 +4,8 @@
 verify_ssl = true
 
 [dev-packages]
+pre-commit = "*"
+ethos-u-vela = {editable = true,path = "."}
 
 [packages]
 ethos-u-vela = {editable = true,path = "."}
diff --git a/Pipfile.lock b/Pipfile.lock
index 6fa0154..c745f93 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "2d930644f3f81f11dae3317cae890fe083479342c80da44161b46ac83d6972d5"
+            "sha256": "84a8b4a6f5aa912c80e0cadffe26bc8575705edc0730d833db4f0984789ac288"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -52,5 +52,152 @@
             "version": "==1.18.2"
         }
     },
-    "develop": {}
+    "develop": {
+        "appdirs": {
+            "hashes": [
+                "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
+                "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
+            ],
+            "version": "==1.4.3"
+        },
+        "cfgv": {
+            "hashes": [
+                "sha256:1ccf53320421aeeb915275a196e23b3b8ae87dea8ac6698b1638001d4a486d53",
+                "sha256:c8e8f552ffcc6194f4e18dd4f68d9aef0c0d58ae7e7be8c82bee3c5e9edfa513"
+            ],
+            "version": "==3.1.0"
+        },
+        "distlib": {
+            "hashes": [
+                "sha256:2e166e231a26b36d6dfe35a48c4464346620f8645ed0ace01ee31822b288de21"
+            ],
+            "version": "==0.3.0"
+        },
+        "ethos-u-vela": {
+            "editable": true,
+            "path": "."
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
+                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
+            ],
+            "version": "==3.0.12"
+        },
+        "flatbuffers": {
+            "hashes": [
+                "sha256:776a959c5f70b41819fa75de44ed14fd984fa1a79b378f27e6f4fff338cbdca2",
+                "sha256:f24185db54193540e3d684dc98aa7c2d89882341641548ceb36fd2589fef6c4e"
+            ],
+            "version": "==1.11.0"
+        },
+        "identify": {
+            "hashes": [
+                "sha256:2bb8760d97d8df4408f4e805883dad26a2d076f04be92a10a3e43f09c6060742",
+                "sha256:faffea0fd8ec86bb146ac538ac350ed0c73908326426d387eded0bcc9d077522"
+            ],
+            "version": "==1.4.14"
+        },
+        "importlib-metadata": {
+            "hashes": [
+                "sha256:2a688cbaa90e0cc587f1df48bdc97a6eadccdcd9c35fb3f976a09e3b5016d90f",
+                "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e"
+            ],
+            "markers": "python_version < '3.8'",
+            "version": "==1.6.0"
+        },
+        "importlib-resources": {
+            "hashes": [
+                "sha256:4019b6a9082d8ada9def02bece4a76b131518866790d58fdda0b5f8c603b36c2",
+                "sha256:dd98ceeef3f5ad2ef4cc287b8586da4ebad15877f351e9688987ad663a0a29b8"
+            ],
+            "markers": "python_version < '3.7'",
+            "version": "==1.4.0"
+        },
+        "nodeenv": {
+            "hashes": [
+                "sha256:5b2438f2e42af54ca968dd1b374d14a1194848955187b0e5e4be1f73813a5212"
+            ],
+            "version": "==1.3.5"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:1598a6de323508cfeed6b7cd6c4efb43324f4692e20d1f76e1feec7f59013448",
+                "sha256:1b0ece94018ae21163d1f651b527156e1f03943b986188dd81bc7e066eae9d1c",
+                "sha256:2e40be731ad618cb4974d5ba60d373cdf4f1b8dcbf1dcf4d9dff5e212baf69c5",
+                "sha256:4ba59db1fcc27ea31368af524dcf874d9277f21fd2e1f7f1e2e0c75ee61419ed",
+                "sha256:59ca9c6592da581a03d42cc4e270732552243dc45e87248aa8d636d53812f6a5",
+                "sha256:5e0feb76849ca3e83dd396254e47c7dba65b3fa9ed3df67c2556293ae3e16de3",
+                "sha256:6d205249a0293e62bbb3898c4c2e1ff8a22f98375a34775a259a0523111a8f6c",
+                "sha256:6fcc5a3990e269f86d388f165a089259893851437b904f422d301cdce4ff25c8",
+                "sha256:82847f2765835c8e5308f136bc34018d09b49037ec23ecc42b246424c767056b",
+                "sha256:87902e5c03355335fc5992a74ba0247a70d937f326d852fc613b7f53516c0963",
+                "sha256:9ab21d1cb156a620d3999dd92f7d1c86824c622873841d6b080ca5495fa10fef",
+                "sha256:a1baa1dc8ecd88fb2d2a651671a84b9938461e8a8eed13e2f0a812a94084d1fa",
+                "sha256:a244f7af80dacf21054386539699ce29bcc64796ed9850c99a34b41305630286",
+                "sha256:a35af656a7ba1d3decdd4fae5322b87277de8ac98b7d9da657d9e212ece76a61",
+                "sha256:b1fe1a6f3a6f355f6c29789b5927f8bd4f134a4bd9a781099a7c4f66af8850f5",
+                "sha256:b5ad0adb51b2dee7d0ee75a69e9871e2ddfb061c73ea8bc439376298141f77f5",
+                "sha256:ba3c7a2814ec8a176bb71f91478293d633c08582119e713a0c5351c0f77698da",
+                "sha256:cd77d58fb2acf57c1d1ee2835567cd70e6f1835e32090538f17f8a3a99e5e34b",
+                "sha256:cdb3a70285e8220875e4d2bc394e49b4988bdb1298ffa4e0bd81b2f613be397c",
+                "sha256:deb529c40c3f1e38d53d5ae6cd077c21f1d49e13afc7936f7f868455e16b64a0",
+                "sha256:e7894793e6e8540dbeac77c87b489e331947813511108ae097f1715c018b8f3d"
+            ],
+            "version": "==1.18.2"
+        },
+        "pre-commit": {
+            "hashes": [
+                "sha256:487c675916e6f99d355ec5595ad77b325689d423ef4839db1ed2f02f639c9522",
+                "sha256:c0aa11bce04a7b46c5544723aedf4e81a4d5f64ad1205a30a9ea12d5e81969e1"
+            ],
+            "index": "pypi",
+            "version": "==2.2.0"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
+                "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
+                "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
+                "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
+                "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
+                "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
+                "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
+                "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
+                "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
+                "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
+                "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
+            ],
+            "version": "==5.3.1"
+        },
+        "six": {
+            "hashes": [
+                "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
+                "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
+            ],
+            "version": "==1.14.0"
+        },
+        "toml": {
+            "hashes": [
+                "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+                "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
+            ],
+            "version": "==0.10.0"
+        },
+        "virtualenv": {
+            "hashes": [
+                "sha256:00cfe8605fb97f5a59d52baab78e6070e72c12ca64f51151695407cc0eb8a431",
+                "sha256:c8364ec469084046c779c9a11ae6340094e8a0bf1d844330fc55c1cefe67c172"
+            ],
+            "version": "==20.0.17"
+        },
+        "zipp": {
+            "hashes": [
+                "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
+                "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
+            ],
+            "markers": "python_version < '3.8'",
+            "version": "==3.1.0"
+        }
+    }
 }
diff --git a/README.md b/README.md
index 03ad7fe..4d7db1a 100644
--- a/README.md
+++ b/README.md
@@ -110,3 +110,45 @@
 
 ## License
 Vela is licensed under [Apache License 2.0](LICENSE.txt)
+## Contributions and Pull Requests
+
+Contributions are accepted under Apache-2.0. Only submit contributions where you have authored all of the code.
+
+### Sanity checks
+
+The Python codebase is PEP8 compliant with the exception of 120 characters line length.
+We run black and flake8 against the code base excluding "ethosu/vela/tflite/" and "ethosu/vela/ethos\_u55\_regs" directories because they are auto-generated by third party tools.
+Those tools are run using [pre-commit framework](https://pre-commit.com/). The configuration file is .pre-commit-config.yaml
+
+#### Install tools
+
+To install pre-commit, run the following:
+
+```
+pipenv install -e . --dev
+```
+
+After the installation, pre-commit is available in the virtual environment.
+
+#### Install the pre-commit hook
+
+To ease the development, we can run those sanity checks before committing the code.
+To install the git hook, run:
+
+```
+$ pre-commit install
+pre-commit installed at .git/hooks/pre-commit
+```
+
+The checks will be run before the commit: if one of them fails, you need to fix the code to make the checks pass.
+
+#### Run the sanity checks
+
+Those checks can be run manually. This can be achievied running the following
+```
+$ pre-commit run flake8 --all-files
+...
+$ pre-commit run black --all-files
+```
+
+If you don't specify anything after run, it will execute all the checks.
diff --git a/ethosu/vela/_version.py b/ethosu/vela/_version.py
index f3888c3..b670819 100644
--- a/ethosu/vela/_version.py
+++ b/ethosu/vela/_version.py
@@ -16,4 +16,4 @@
 
 import pkg_resources
 
-__version__ = pkg_resources.get_distribution("ethos-u-vela").version
\ No newline at end of file
+__version__ = pkg_resources.get_distribution("ethos-u-vela").version
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 51c632e..69f95fa 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -18,13 +18,17 @@
 # Description:
 # Holds a container for Ethos-U55/System architecture parameters.
 
-from .nn_graph import MemArea, TensorPurpose, NpuBlockType, TensorFormat
-from .numeric_util import round_up, round_up_divide
+import enum
 from collections import namedtuple
 from configparser import ConfigParser
-from .supported_operators import SupportedOperators
+
 import numpy as np
-import enum
+
+from .tensor import MemArea, TensorPurpose, TensorFormat
+from .operation import NpuBlockType
+from .numeric_util import round_up, round_up_divide
+from .supported_operators import SupportedOperators
+
 
 PointXY = namedtuple("PointXY", "x y")
 PointXYZ = namedtuple("PointXYZ", "x y z")
@@ -151,7 +155,7 @@
         accelerator_config = accelerator_config.lower()
         self.vela_config = vela_config
         self.accelerator_config = accelerator_config
-        if not self.accelerator_config in ArchitectureFeatures.accelerator_configs:
+        if self.accelerator_config not in ArchitectureFeatures.accelerator_configs:
             raise Exception("Unknown accelerator configuration " + self.accelerator_config)
         accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
         self.config = accel_config
@@ -450,7 +454,6 @@
         )
 
         # Calculate how many IFM blocks this OFM block requires (i.e how many jobs)
-        ifm_block = self.get_ifm_block_size(ifm_block_depth, ofm_block, kernel, self.ofm_block_max)
         ifm_depth_blocks = round_up_divide(ifm.size().depth, ifm_block_depth)
         ifm_depth_blocks = 1  # Overwrite with 1 to force OFM block dependency, not IFM
 
@@ -476,7 +479,6 @@
         # Iterate over the next BLOCKDEP inputs, checking to see if a sliding window
         # of IFM area overlaps with any previous OFM block generation.
         elapsed_jobs = 0
-        ifm_depth = ifm.size().depth
         for forward_offset in range(ArchitectureFeatures.MAX_BLOCKDEP):
             # This is the IFM block we want to sample from
             in_area = self.get_first_job_input_volume(
@@ -533,7 +535,7 @@
                 n_elements = op.inputs[0].elements()
                 cycles = intercept + n_elements * slope
                 return cycles
-            except:
+            except Exception:
                 print("Error: Reading CPU cycle estimate in vela configuration file, section {}".format(section))
                 raise
 
@@ -554,7 +556,7 @@
             print("Warning: Using default values for system configuration")
         else:
             section_key = "SysConfig." + self.system_config
-            if not section_key in self.vela_config:
+            if section_key not in self.vela_config:
                 raise Exception("Unknown system configuration " + self.system_config)
 
         try:
@@ -585,7 +587,7 @@
                     + " (must be 'OnChipFlash' or 'OffChipFlash'). To store the weights and other constant data in SRAM"
                     " select 'OnChipFlash'"
                 )
-        except:
+        except Exception:
             print("Error: Reading System Configuration in vela configuration file, section {}".format(section_key))
             raise
 
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index db669ac..6fc3b65 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -18,6 +18,8 @@
 # Description:
 # Contains the main sequencing of the compiler.
 
+import time
+
 from . import graph_optimiser
 from . import mark_tensors
 from . import insert_dma
@@ -25,9 +27,6 @@
 from . import scheduler
 from . import tensor_allocation
 from . import npu_performance
-import time
-
-from . import high_level_command_stream
 from . import high_level_command_stream_generator
 from . import register_command_stream_generator
 from . import extract_npu_subgraphs
@@ -36,7 +35,7 @@
 from . import live_range
 from .tensor import MemArea
 from .nn_graph import TensorAllocator, PassPlacement
-from .rewrite_graph import verify_graph_health, verify_subgraph_health
+from .rewrite_graph import verify_graph_health
 
 
 class CompilerOptions:
diff --git a/ethosu/vela/data_type.py b/ethosu/vela/data_type.py
index 1d3e94e..6dfe216 100644
--- a/ethosu/vela/data_type.py
+++ b/ethosu/vela/data_type.py
@@ -18,9 +18,10 @@
 # Description:
 # Defines the basic numeric type classes for tensors.
 
-from .numeric_util import round_up_divide
 import enum
 
+from .numeric_util import round_up_divide
+
 
 class BaseType(enum.Flag):
     Signed = 1
diff --git a/ethosu/vela/driver_actions.py b/ethosu/vela/driver_actions.py
index 86c4a36..bd15af2 100644
--- a/ethosu/vela/driver_actions.py
+++ b/ethosu/vela/driver_actions.py
@@ -18,9 +18,11 @@
 # Description:
 # Creates driver actions that are embedded in the custom operator payload.
 
-import numpy as np
 from typing import List
-from .ethos_u55_regs.ethos_u55_regs import *
+
+import numpy as np
+
+from .ethos_u55_regs.ethos_u55_regs import config_r, id_r, ARCH_VER
 
 
 class DACommands:
@@ -43,8 +45,8 @@
 
 
 def emit_fourcc(data: List[int], fourcc: str):
-    assert data != None
-    assert fourcc != None
+    assert data is not None
+    assert fourcc is not None
     assert len(fourcc) == 4
     value: int = 0
     value = fourcc[0].encode()[0]
@@ -75,14 +77,14 @@
 
 
 def emit_config(data: List[int], rel: int, patch: int, arch):
-    assert data != None
+    assert data is not None
     data.append(make_da_tag(DACommands.Config, 0, (patch << DACommands.Config_PatchShift) | rel))
     data.append(build_config_word(arch))
     data.append(build_id_word())
 
 
 def emit_cmd_stream_header(data: List[int], length: int):
-    assert data != None
+    assert data is not None
     # Insert NOPs to align start of command stream to 16 bytes
     num_nops = 4 - ((len(data) + 1) % 4)
     for _ in range(num_nops):
@@ -95,7 +97,7 @@
 
 
 def emit_reg_read(data: List[int], reg_index: int, reg_count: int = 1):
-    assert data != None
+    assert data is not None
     assert reg_index >= 0
     assert reg_count >= 1
     payload: int = (reg_index & DACommands.ReadAPB_IndexMask) | ((reg_count << DACommands.ReadAPB_CountShift) - 1)
@@ -103,5 +105,5 @@
 
 
 def emit_dump_shram(data: List[int]):
-    assert data != None
+    assert data is not None
     data.append(make_da_tag(DACommands.DumpSHRAM, 0, 0))
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index 5b9ba8b..ab3db21 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -23,10 +23,11 @@
 # by NpuOp operations. Later, Vela generates command streams and compressed weight streams for the NPU subgraphs and
 # attaches them to the NpuOp. This encapsulates everything the NPU subgraph is supposed to do.
 
-from .nn_graph import Pass, PassPlacement, NpuBlockType, Subgraph
-from .operation import Operation
 import numpy as np
 
+from .nn_graph import Pass, PassPlacement, Subgraph
+from .operation import Operation, NpuBlockType
+
 
 def make_npu_call_op_pass(npu_subgraph):
     op = Operation("NpuOp", "call_" + npu_subgraph.name)
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index a4ed39f..b29a382 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -19,12 +19,15 @@
 # Early optimisation of the network graph, using the rewrite_graph module to do the traversal of the graph. These are
 # split into two parts optimise_graph_a and optimise_graph_b.
 
-from .nn_graph import Operation, NpuBlockType, Tensor
-from . import rewrite_graph
-from .data_type import BaseType, DataType
-import numpy as np
 import math
-from .numeric_util import round_up_divide
+
+import numpy as np
+
+from . import rewrite_graph
+from .operation import Operation, NpuBlockType
+from .tensor import Tensor
+from .data_type import DataType
+
 
 passthrough_nodes = set(("Identity",))
 
@@ -83,7 +86,7 @@
 
         # For Split the offset cannot be extracted from the tensor so it has to
         # be calculated from the index of the output tensor
-        if axis != None:
+        if axis is not None:
             # Get the start and end of the split
             offset_start = [0] * len(tens.shape)
             offset_end = [0] * len(tens.shape)
@@ -316,6 +319,7 @@
 activation_ops = set(("Relu", "Relu6", "ReluN1To1", "Sigmoid", "Tanh"))
 memory_only_ops = set(("Reshape",))
 
+
 # Check if the op can be reordered
 def get_prepend_op(op):
     inp = op.inputs[0]
@@ -326,7 +330,7 @@
         prep_op = prev_op
         inp = prev_op.inputs[0]
         prev_op = inp.ops[-1]
-    if prev_op != None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:
+    if prev_op is not None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:
         return prep_op
 
     return None
@@ -384,7 +388,7 @@
 def fixup_act_reorder(op, arch):
     if op.type in activation_ops:
         prep_op = get_prepend_op(op)
-        if prep_op != None:
+        if prep_op is not None:
             act_op = op.clone("_reordered")
             act_op.inputs = [prep_op.inputs[0]]
             act_op_out = act_op.inputs[0].clone("_acted")
@@ -400,7 +404,7 @@
 
 
 def convert_mul_max_to_abs_or_lrelu(op, arch):
-    """Whenever there is a subgraph with this topology:
+    r"""Whenever there is a subgraph with this topology:
 
        Input    X   For X = -1 or X > 0
        |   \   /    This subgraph can be replaced with either
@@ -487,24 +491,25 @@
     for idx, sg in enumerate(nng.subgraphs):
         # rewrite graph pass
         nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
-            sg, arch, [fixup_unpack_output,], op_rewrite_list, rewrite_unsupported=False
+            sg, arch, [fixup_unpack_output], op_rewrite_list, rewrite_unsupported=False
         )
 
     for idx, sg in enumerate(nng.subgraphs):
         # remove passthrough tensors
-        nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor,], [])
+        nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor], [])
 
     if verbose_graph:
         nng.print_graph()
     return nng
 
+
 def optimise_graph_b(nng, arch, verbose_graph=False):
     if verbose_graph:
         nng.print_graph()
 
     for idx, sg in enumerate(nng.subgraphs):
         # combined rewrite graph pass
-        nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split,], [])
+        nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split], [])
 
     if verbose_graph:
         nng.print_graph()
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 952e203..bdb0490 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -18,8 +18,10 @@
 # Description:
 # Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe).
 
-from enum import Enum, IntEnum
+from enum import IntEnum
+
 import numpy as np
+
 from .operation import NpuBlockType
 from .numeric_util import round_up_divide
 from .range_set import MemoryAccessSet, AccessDirection
@@ -42,12 +44,12 @@
         new_start_coord[concat_axis] -= concat_offset
         new_end_coord[concat_axis] -= concat_offset
 
-        if split_offset != None:
+        if split_offset is not None:
             for idx in range(len(split_offset)):
                 new_start_coord[idx] += split_offset[idx]
                 new_end_coord[idx] += split_offset[idx]
 
-        if split_offset == None and npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct)):
+        if split_offset is None and npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct)):
             # these types of operations do a "dot product" over the entire IFM
             new_start_coord[-1] = 0
             new_end_coord[-1] = ifm_shape[-1]
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 364df6f..47392c0 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -22,9 +22,8 @@
 # calc_allowed_ofm_ifm_overlap_for_cascaded_pass().
 
 from .nn_graph import SchedulingStrategy, PassPlacement
-import numpy as np
 from .operation import NpuBlockType
-from .high_level_command_stream import Box, CommandType, Command, NpuStripe, DMA
+from .high_level_command_stream import Box, NpuStripe, DMA
 
 
 def need_dma(tens):
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index b63c1ea..33f1a02 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -18,13 +18,14 @@
 # Description:
 # Insert DMA operations into the graph for transfering weights.
 
-from .nn_graph import Operation, MemArea, TensorPurpose, NpuBlockType
 from . import rewrite_graph
+from .tensor import MemArea, TensorPurpose
+from .operation import Operation, NpuBlockType
 
 
 def insert_dma_cmd(op, arch):
     if op.type == "DMA":
-        return op # Already rewritten
+        return op  # Already rewritten
     for idx, tens in enumerate(op.inputs):
 
         if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area:
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 24f1f64..54c15ba 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -20,7 +20,7 @@
 # Can work with either a pass packed subgraph or a scheduled subgraph.
 
 from .tensor import Tensor, MemArea
-from .nn_graph import TensorPurpose, PassPlacement
+from .nn_graph import PassPlacement
 from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_cascaded_pass
 
 
@@ -90,9 +90,9 @@
             if tens.address == 0:
                 tens.address = address
                 # Also need to set the address to the tensor's cpu/npu clones
-                if tens.cpu_tensor != None:
+                if tens.cpu_tensor is not None:
                     tens.cpu_tensor.address = address
-                if tens.npu_tensor != None:
+                if tens.npu_tensor is not None:
                     tens.npu_tensor.address = address
 
     def get_alignment(self):
@@ -115,8 +115,8 @@
             output_tensor = ps.outputs[0]
             # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
             # or output, fuse the live-range with the Cpu tensors' live-range instead.
-            input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor != None else input_tensor
-            output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor != None else output_tensor
+            input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
+            output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
             if not tensor_should_be_ignored(input_tensor, target_mem_area) and not tensor_should_be_ignored(
                 output_tensor, target_mem_area
             ):
@@ -221,7 +221,7 @@
     ignore_subgraph_input_output_tensors=False,
     lr_graph=None,
 ):
-    if lr_graph == None:
+    if lr_graph is None:
         lr_graph = LiveRangeGraph()
 
     if sg in lr_graph.processed_subgraphs:
diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py
index 9b1824b..c42a28d 100644
--- a/ethosu/vela/mark_tensors.py
+++ b/ethosu/vela/mark_tensors.py
@@ -21,7 +21,7 @@
 from . import rewrite_graph
 from . import weight_compressor
 from .architecture_features import Block
-from .nn_graph import TensorPurpose, TensorFormat, PassPlacement
+from .tensor import TensorPurpose, TensorFormat
 from .operation import NpuBlockType
 
 
@@ -55,6 +55,7 @@
         print("Warning: Propagating unknown tensor purpose", op)
     return res
 
+
 tensor_purposes = [  # ops, input_purpose
     (
         set(
@@ -327,7 +328,7 @@
             return NpuBlockType.Default
 
     def visit_tens(tens, ps):
-        if not tens in formats_for_tensor:
+        if tens not in formats_for_tensor:
             fmt = init_tens(tens)
         else:
             fmt = formats_for_tensor[tens]
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 8d335bd..e7820fe 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -24,9 +24,6 @@
 # Graph - A full neural network graph with one or more Subgraphs.
 
 import enum
-from .data_type import BaseType, DataType
-from .tensor import MemArea, TensorPurpose, TensorSubPurpose, TensorFormat, Tensor
-from .operation import Operation, NpuBlockType
 
 
 class PassPlacement(enum.Enum):
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 84cc493..11f1e92 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -23,12 +23,13 @@
 # estimate.
 
 import enum
-from . import numeric_util
+
 import numpy as np
-from .tensor import TensorPurpose, MemArea, TensorFormat, shape_num_elements, Tensor, TensorBlockTraversal
-from .operation import Operation
-from .data_type import DataType, BaseType
-from .nn_graph import PassPlacement, NpuBlockType, SchedulerRewrite, Pass
+
+from . import numeric_util
+from .tensor import TensorPurpose, MemArea, shape_num_elements, TensorBlockTraversal
+from .nn_graph import PassPlacement, SchedulerRewrite
+from .operation import NpuBlockType
 from .architecture_features import Block, Kernel
 
 
@@ -357,9 +358,7 @@
                     n_kernel_xy, 4
                 )  # need at least 4, as this is the minimum duty cycle for secondary accumulator writes
                 if weight_tensor is not None:
-                    n_kernel_xy = numeric_util.round_up(
-                        n_kernel_xy, 4
-                    )  # weights need to be read in blocks of 4
+                    n_kernel_xy = numeric_util.round_up(n_kernel_xy, 4)  # weights need to be read in blocks of 4
 
             num_mac_ops = 0
             for n_blocks_for_size, block_size in block_setup:
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index 4542c25..29ede84 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -18,13 +18,15 @@
 # Description:
 # Serialises and packs an NPU subgraph into tensors.
 
+import struct
+
+import numpy as np
+
+from . import driver_actions
 from .nn_graph import PassPlacement
 from .tensor import MemArea, Tensor, TensorPurpose, TensorFormat
 from .operation import Operation
 from .data_type import DataType
-import numpy as np
-from . import driver_actions
-import struct
 
 
 def make_memory_tensor(name, mem_area, sz, want_values, arch):
@@ -75,7 +77,7 @@
     nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size
     nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size
 
-    if flash_tens == scratch_tens == None:
+    if flash_tens == scratch_tens is None:
         # First Npu subgraph, create scratch and flash tensors
         sg.scratch_tensor = make_memory_tensor(sg.name + "_scratch", scratch_area, scratch_size, False, arch)
         sg.scratch_tensor.purpose = TensorPurpose.Scratch
@@ -88,7 +90,7 @@
 
     for cps in sg.cascaded_passes:
         for ps in cps.passes:
-            if ps.placement == PassPlacement.Npu and ps.weight_tensor != None:
+            if ps.placement == PassPlacement.Npu and ps.weight_tensor is not None:
                 # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
                 # is pointing at the destination address of where the weights should be placed in SRAM.
                 # This ensures that the Flash weight tensor is used instead and thus gets the correct address.
diff --git a/ethosu/vela/numeric_util.py b/ethosu/vela/numeric_util.py
index e5bc88b..4e61b4c 100644
--- a/ethosu/vela/numeric_util.py
+++ b/ethosu/vela/numeric_util.py
@@ -19,6 +19,7 @@
 # Numerical utilities for various types of rounding etc.
 
 import math
+
 import numpy as np
 
 
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 663520f..bae8151 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -18,10 +18,12 @@
 # Description:
 # Packs a subgraph with Neural Network Operations into Passes. Each Pass has one or more Operations.
 
-from .nn_graph import Operation, Pass, PassPlacement, TensorPurpose, NpuBlockType, Tensor
-import collections
 import enum
-from .data_type import BaseType, DataType
+import collections
+
+from .nn_graph import Pass, PassPlacement
+from .tensor import TensorPurpose
+from .operation import Operation, NpuBlockType
 
 
 class PassFlags(enum.Flag):
@@ -104,10 +106,7 @@
 
 
 quantization_ops = set(("Dequantize", "QuantizeV2", "Max", "Min"))
-cpu_ops = (
-    set(("Softmax", "QuantizedSoftmax", "LRN", "Shape", "QuantizedPad", "Pad", "AddN"))
-    | quantization_ops
-)
+cpu_ops = set(("Softmax", "QuantizedSoftmax", "LRN", "Shape", "QuantizedPad", "Pad", "AddN")) | quantization_ops
 
 npu_dma_ops = set(("DMA",))
 startup_init_ops = set(("Const", "VariableV2", "Placeholder", "SubgraphInput"))
@@ -183,7 +182,7 @@
         # flags_to_set
         PassFlags.Npu | PassFlags.Dma,
         # flags_to_clear
-        PassFlags.Empty
+        PassFlags.Empty,
     ),
     (
         # ops_set
@@ -203,7 +202,7 @@
         # flags_to_set
         PassFlags.MemoryOnly | PassFlags.Main,
         # flags_to_clear
-        PassFlags.Empty
+        PassFlags.Empty,
     ),
     (
         # ops_set
@@ -213,9 +212,9 @@
         # flags_to_set
         PassFlags.Cpu | PassFlags.Main,
         # flags_to_clear
-        PassFlags.Empty
+        PassFlags.Empty,
     ),
-    (   # This last one is a fallback for unrecognised operations
+    (  # This last one is a fallback for unrecognised operations
         # ops_set
         None,
         # incompatible_pack_flags
@@ -223,7 +222,7 @@
         # flags_to_set
         PassFlags.Cpu | PassFlags.Main,
         # flags_to_clear
-        PassFlags.Empty
+        PassFlags.Empty,
     ),
 ]
 
@@ -346,7 +345,7 @@
 
         is_element_wise = True
         for op in reverse_ops_list:
-            if not op.type in elem_wise_ops and not op.type in npu_dma_ops:
+            if op.type not in elem_wise_ops and op.type not in npu_dma_ops:
                 is_element_wise = False
                 break
 
@@ -368,9 +367,9 @@
         ops_list = list(reversed(reverse_ops_list))
         intermediates = list(reversed(reverse_intermediates))
 
-        if primary_op == None:
+        if primary_op is None:
             primary_op = create_primary_op(ops_list)
-            if primary_op != None:
+            if primary_op is not None:
                 visit_tensor_refcount[primary_op.inputs[0]] += 1
                 npu_block_type = primary_op.attrs["npu_block_type"]
                 for input_tens in primary_op.inputs:
diff --git a/ethosu/vela/range_set.py b/ethosu/vela/range_set.py
index 64de970..d7623c5 100644
--- a/ethosu/vela/range_set.py
+++ b/ethosu/vela/range_set.py
@@ -19,7 +19,6 @@
 # Helper classes to track memory accesses for calculating dependencies between Commands.
 
 from enum import IntEnum
-from collections import defaultdict
 from functools import lru_cache
 
 
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 120cf8b..460cf01 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -22,25 +22,19 @@
 
 from collections import defaultdict
 from enum import Enum, IntEnum
+
+import numpy as np
+
+from . import scaling
 from .high_level_command_stream import CommandType
-from .ethos_u55_regs.ethos_u55_regs import *
-from .tensor import MemArea, TensorBlockTraversal
+from .ethos_u55_regs.ethos_u55_regs import cmd0, cmd1, acc_format, elementwise_mode, rounding, activation, ifm_precision
+from .tensor import MemArea, TensorBlockTraversal, TensorFormat
 from .operation import NpuBlockType
 from .numeric_util import quantise_float32, round_up, round_away_zero, round_up_to_int, clamp_sigmoid, clamp_tanh
 from .data_type import BaseType, DataType
-import numpy as np
 from .shared_buffer_allocation import SharedBufferAllocation
 from .architecture_features import SharedBufferArea, SHRAMElements, ArchitectureFeatures
-from .nn_graph import TensorFormat, SchedulingStrategy
-from .range_set import (
-    MemoryAccessSet,
-    AccessDirection,
-)
-from .mark_tensors import (
-    reshape_operations,
-)
 from .architecture_features import Block, Kernel, Rect
-from . import scaling
 
 
 class RegisterMachine:
@@ -372,7 +366,6 @@
             param = relative_dep[CommandType.DMA][0]
             param = min(param, 0xF)  # Clamp to allowable wait amount
             emit.cmd_wait(cmd0.NPU_OP_DMA_WAIT, param, absolute_dep[CommandType.DMA][0])
-            prev_cmd = None  # Clear any dependency
 
     for cmd in cmd_stream:
         if cmd.cmdtype == CommandType.DMA:
@@ -684,7 +677,7 @@
             ifm_max = cmd.ifm_tensor.quantization.max
 
             # Emit commands for any fused activation function
-            if faf == None:
+            if faf is None:
                 emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation.NONE)
                 # Even if no activation function, values need to be set to override previous values
                 faf_min = ofm_quant_qmin
@@ -765,13 +758,13 @@
                 ),
             ):
 
-                if tens == None:
+                if tens is None:
                     continue
 
-                need_zero_point = (faf != None) or (fmf == "ConcatSliceWrite")
+                need_zero_point = (faf is not None) or (fmf == "ConcatSliceWrite")
                 if (
                     primary_op.type in set(("AvgPool", "AvgPoolAct")) and not need_zero_point
-                ) or tens.quantization == None:
+                ) or tens.quantization is None:
                     # Actual integer operation, just set scale to 1 and zero point to 0
                     emit.cmd0_with_param(zero_point_op, 0)
                 else:
diff --git a/ethosu/vela/scaling.py b/ethosu/vela/scaling.py
index ce0259a..3b749dd 100644
--- a/ethosu/vela/scaling.py
+++ b/ethosu/vela/scaling.py
@@ -19,9 +19,10 @@
 # Contains various scaling calculations for weights, elementwise operations, pooling etc.
 
 import math
-from .numeric_util import round_away_zero
 from enum import IntEnum
 
+from .numeric_util import round_away_zero
+
 
 class OperandToScale(IntEnum):
     OPa = 1
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index d51b5ac..fe31a46 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -19,24 +19,17 @@
 # The scheduler costs various strategies for scheduling the network in order to select the block configuration.
 
 import enum
-from .nn_graph import (
-    TensorPurpose,
-    TensorSubPurpose,
-    TensorFormat,
-    MemArea,
-    SchedulingStrategy,
-    CascadedPass,
-    PassPlacement,
-    SchedulerRewrite,
-    Operation,
-    NpuBlockType,
-)
-from . import live_range
+import copy
+
 import numpy as np
+
+from . import live_range
 from . import npu_performance
 from . import stats_writer
+from .tensor import TensorPurpose, TensorSubPurpose, TensorFormat, MemArea
+from .operation import NpuBlockType
+from .nn_graph import SchedulingStrategy, CascadedPass, PassPlacement, SchedulerRewrite
 from .npu_performance import make_bandwidth_array, make_macs_array, make_cycles_array, make_metrics_arrays, PassCycles
-import time, copy
 from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_pass_list
 from .shared_buffer_allocation import (
     find_block_configs_suitable_for_pass_and_shared_buffer,
@@ -279,7 +272,6 @@
         if len(candidates) <= 1:
             return candidates
         assert remove_equally_good_candidates
-        start = time.time()
         pareto_vals = np.zeros((len(candidates), DynamicProgrammingScheduler.num_pareto_metrics))
         ids = np.arange(len(candidates), dtype=np.int32)
         for idx, cand in enumerate(candidates):
@@ -713,7 +705,7 @@
 
     def get_block_configs(self, ps):
         if ps.placement != PassPlacement.Npu:
-            return [(1, 1, 1, 1)] # default
+            return [(1, 1, 1, 1)]  # default
 
         block_configs = find_block_configs_suitable_for_pass_and_shared_buffer(self.arch, ps)
 
@@ -764,9 +756,7 @@
             for tens in ps.intermediates:
                 if tens.mem_area == self.mem_area:
                     if tens.purpose == TensorPurpose.Weights:
-                        sram_used += tens.storage_size_for_sub_purpose(
-                            TensorSubPurpose.DoubleBuffer, block_config[3]
-                        )
+                        sram_used += tens.storage_size_for_sub_purpose(TensorSubPurpose.DoubleBuffer, block_config[3])
                         rewrite_list.append(
                             (
                                 SchedulerRewrite.ChangeTensorSubPurpose,
@@ -884,7 +874,7 @@
                 % (len(self.sg.passes), len(pass_to_cascaded_pass))
             )
             for ps in self.sg.passes:
-                if not ps in pass_to_cascaded_pass:
+                if ps not in pass_to_cascaded_pass:
                     print("%3d pass missing cascaded pass %s" % (ps.time, ps))
 
             assert len(pass_to_cascaded_pass) == len(self.sg.passes)
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index b5408d1..29be6d8 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -19,10 +19,9 @@
 # Shared buffer allocation works out how to allocate the Ethos-U55 shared buffer for a given pass.
 
 import numpy as np
-from .nn_graph import NpuBlockType
-from .numeric_util import round_up_divide, round_up
+
+from .operation import NpuBlockType
 from .architecture_features import Block, Kernel, SHRAMElements, SharedBufferArea, ArchitectureFeatures
-from . import pass_packing
 
 
 class SharedBufferAllocation:
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index c4b4cd9..3fd29d1 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -18,13 +18,16 @@
 # Description:
 # Writes out per-pass and summary performance statistics to CSV files.
 
-import numpy as np
-from .nn_graph import MemArea, TensorPurpose, PassPlacement
-from .npu_performance import PassCycles, MacCount, BandwidthDirection
 import csv
-from .numeric_util import round_up_to_int
 import sys
 
+import numpy as np
+
+from .tensor import MemArea, TensorPurpose
+from .nn_graph import PassPlacement
+from .npu_performance import PassCycles, MacCount, BandwidthDirection
+from .numeric_util import round_up_to_int
+
 
 def write_summary_metrics_csv(nng, summary_filename, arch):
     with open(summary_filename, "w") as f:
@@ -246,7 +249,7 @@
 
     print(file=f)
     for mem_area, label in mem_area_labels:
-        if not mem_area in memory_used:
+        if mem_area not in memory_used:
             continue
 
         aug_label = label + " used"
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 5d0206c..5cebf4d 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -19,10 +19,11 @@
 # Internal representation of a Neural Network Tensor.
 
 import enum
-from . import numeric_util
-import numpy as np
-from . import data_type
 import uuid
+
+import numpy as np
+
+from . import numeric_util
 from .range_set import MemoryRangeSet
 from .numeric_util import round_up_divide
 
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 94aa608..255156e 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -19,13 +19,14 @@
 # Wrapping function to do tensor address allocation. That is, assigning addresses to tensors based on what has been
 # worked out from the allowable overlaps that are calculated by the live range analysis.
 
-from . import live_range
-from .tensor import MemArea
 import math
-from . import numeric_util
-import numpy as np
-from .nn_graph import TensorAllocator, PassPlacement
 
+import numpy as np
+
+from . import live_range
+from . import numeric_util
+from .tensor import MemArea
+from .nn_graph import TensorAllocator
 from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges
 
 
diff --git a/ethosu/vela/tflite_mapping.py b/ethosu/vela/tflite_mapping.py
index e2b9076..e8b40bd 100644
--- a/ethosu/vela/tflite_mapping.py
+++ b/ethosu/vela/tflite_mapping.py
@@ -20,19 +20,11 @@
 # Contains a mapping from the various TensorFlow Lite enums and options structs, generated by the FlatBuffer code
 # generator, to Vela's internal format.
 
-import numpy as np
 import struct
 
+import numpy as np
+
 from .data_type import DataType
-
-from .tflite.TensorType import TensorType
-from .tflite.BuiltinOperator import BuiltinOperator
-from .tflite.BuiltinOptions import BuiltinOptions
-
-
-from .tflite.Padding import Padding
-from .tflite.ActivationFunctionType import ActivationFunctionType
-
 from .tflite import Conv2DOptions
 from .tflite import DepthwiseConv2DOptions
 from .tflite import ConcatEmbeddingsOptions
@@ -132,6 +124,11 @@
 from .tflite import SegmentSumOptions
 from .tflite import SelectV2Options
 from .tflite import WhileOptions
+from .tflite.TensorType import TensorType
+from .tflite.BuiltinOperator import BuiltinOperator
+from .tflite.BuiltinOptions import BuiltinOptions
+from .tflite.Padding import Padding
+from .tflite.ActivationFunctionType import ActivationFunctionType
 
 
 def inverse_map(map):
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 535847d..4456d5a 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -18,14 +18,15 @@
 # Description:
 # Functions used to read from a TensorFlow Lite format file.
 
-from .tflite.Model import Model
-from .tflite.BuiltinOperator import BuiltinOperator
+import os.path
 
 import numpy as np
-import os.path
-from .nn_graph import Graph, Operation, Subgraph
-from .tensor import Tensor, QuantizationParameters
 
+from .tflite.Model import Model
+from .tflite.BuiltinOperator import BuiltinOperator
+from .nn_graph import Graph, Subgraph
+from .operation import Operation
+from .tensor import Tensor, QuantizationParameters
 from .tflite_mapping import builtin_operator_map, datatype_map, datatype_map_numpy, DataType
 
 
@@ -184,12 +185,7 @@
 
 class TFLiteGraph:
     def __init__(
-        self,
-        filename,
-        batch_size=1,
-        feed_dict={},
-        output_node_names=[],
-        initialisation_nodes=[],
+        self, filename, batch_size=1, feed_dict={}, output_node_names=[], initialisation_nodes=[],
     ):
 
         self.op_times = {}
@@ -238,15 +234,9 @@
 
 
 def read_tflite(
-    filename,
-    batch_size=1,
-    feed_dict={},
-    output_node_names=[],
-    initialisation_nodes=[],
+    filename, batch_size=1, feed_dict={}, output_node_names=[], initialisation_nodes=[],
 ):
-    tflite_graph = TFLiteGraph(
-        filename, batch_size, feed_dict, output_node_names, initialisation_nodes
-    )
+    tflite_graph = TFLiteGraph(filename, batch_size, feed_dict, output_node_names, initialisation_nodes)
     nng = tflite_graph.nng
     nng.refresh_after_modification()
     return nng
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index f55d1ce..1f07242 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -18,7 +18,13 @@
 # Description:
 # Functions used to write to a TensorFlow Lite format file. Supports adding in file identifiers.
 
+import numpy as np
 import flatbuffers
+from flatbuffers.builder import UOffsetTFlags
+
+# ugh, the python flatbuffer interface is missing a method to add in file identifier. patching it in here:
+import flatbuffers.number_types as N
+from flatbuffers import encode
 
 from .tflite import Tensor
 from .tflite import QuantizationParameters
@@ -28,22 +34,14 @@
 from .tflite import Operator
 from .tflite import Buffer
 from .tflite import Metadata
-
-import numpy as np
-
 from .tflite_mapping import datatype_inv_map, builtin_operator_inv_map, custom_prefix, BuiltinOperator
 from .nn_graph import PassPlacement
 from .tensor import TensorPurpose, MemArea
-from flatbuffers.builder import UOffsetTFlags
 
 tflite_version = 3
 tflite_file_identifier = "TFL" + str(tflite_version)
 
 
-import flatbuffers.number_types as N
-from flatbuffers import encode
-
-
 def FinishWithFileIdentifier(self, rootTable, fid):
     if fid is None or len(fid) != 4:
         raise Exception("fid must be 4 chars")
@@ -163,8 +161,8 @@
                 tf_code, opt_serializer = builtin_operator_inv_map[code]
             except KeyError:
                 print(
-                    "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping, as a custom operation"
-                    % (code,)
+                    "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping,"
+                    "as a custom operation" % (code,)
                 )
                 tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
 
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index f07aec8..07772e6 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -20,11 +20,10 @@
 #
 # Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
 
-import sys
-import os.path
 import os
+import os.path
+import sys
 import time
-import subprocess
 import configparser
 import argparse
 import ast
@@ -37,7 +36,8 @@
 from . import scheduler
 from ._version import __version__
 from .scheduler import ParetoMetric
-from .nn_graph import MemArea, TensorFormat, TensorAllocator, PassPlacement
+from .nn_graph import TensorAllocator, PassPlacement
+from .tensor import MemArea
 
 
 def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 9219724..ee554b5 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -18,12 +18,11 @@
 # Description:
 # Compresses and pads the weigths. It also calculates the scales and packs with the biases.
 
-import os
-import sys
-import enum
 import math
-import numpy as np
 from collections import namedtuple
+
+import numpy as np
+
 from .numeric_util import round_up
 from .scaling import quantise_scale, reduced_quantise_scale
 from .tensor import TensorPurpose, TensorSubPurpose, TensorFormat, TensorBlockTraversal
@@ -44,7 +43,7 @@
 
     # pad with 0xFF as needed so the length of the weight stream
     # is a multiple of 16
-  
+
     while (len(compressed) % 16) != 0:
         compressed.append(0xFF)
 
@@ -348,7 +347,7 @@
 
     for sg in nng.subgraphs:
         for ps in sg.passes:
-            if ps.weight_tensor != None:
+            if ps.weight_tensor is not None:
                 npu_usage_of_tensor = find_npu_usage_of_tensor(ps.weight_tensor)
                 if npu_usage_of_tensor == NpuBlockType.ConvolutionDepthWise:
                     ps.weight_tensor.quant_values = np.transpose(ps.weight_tensor.quant_values, (0, 1, 3, 2))
@@ -382,7 +381,7 @@
                     src_tens.weight_compression_scales = ps.weight_tensor.weight_compression_scales
                     src_tens.weight_compressed_offsets = ps.weight_tensor.weight_compressed_offsets
 
-            if ps.scale_tensor != None:
+            if ps.scale_tensor is not None:
                 rescale_for_faf = False
                 activation_ops = set(("Sigmoid", "Tanh"))
                 if (ps.ops[-1].type in activation_ops) and (ps.npu_block_type != NpuBlockType.ElementWise):