fix: Avoid recursive os.walk calls

`os.walk` is a generator that can iteratively navigate from the
specified path, top-bottom. However, most of the calls to `os.walk` in
the project cast the call to `list()`, which makes it traverse the path
and recursively find all nested directories.

This is commonly not needed, as we end up just using a `[0]` index to
only access the root path.

This change adds a few utils that simplifies listing files/directories,
and by default does it non-recursively. Performance gains shouldn't be
noticeable in systems with high-speed storage, but we can avoid the edge
cases of users having too many nested directories, by avoiding unneeded
I/O.
This commit is contained in:
Michael Manganiello
2024-07-13 14:10:20 -03:00
parent 1329dbbbc3
commit f20a9ffe34
4 changed files with 43 additions and 9 deletions

View File

@@ -12,6 +12,7 @@ from exceptions.fs_exceptions import (
from fastapi import UploadFile
from logger.logger import log
from models.platform import Platform
from utils.filesystem import iter_files
from .base_handler import FSHandler
@@ -38,7 +39,7 @@ class FSFirmwareHandler(FSHandler):
firmware_file_path = f"{LIBRARY_BASE_PATH}/{firmware_path}"
try:
fs_firmware_files: list[str] = list(os.walk(firmware_file_path))[0][2]
fs_firmware_files = [f for _, f in iter_files(firmware_file_path)]
except IndexError as exc:
raise FirmwareNotFoundException(platform.fs_slug) from exc

View File

@@ -8,6 +8,7 @@ from exceptions.fs_exceptions import (
FolderStructureNotMatchException,
PlatformAlreadyExistsException,
)
from utils.filesystem import iter_directories
from .base_handler import FSHandler
@@ -44,12 +45,14 @@ class FSPlatformsHandler(FSHandler):
"""
cnfg = cm.get_config()
platforms_dir = (
cnfg.HIGH_PRIO_STRUCTURE_PATH
if os.path.exists(cnfg.HIGH_PRIO_STRUCTURE_PATH)
else LIBRARY_BASE_PATH
)
try:
platforms: list[str] = (
list(os.walk(cnfg.HIGH_PRIO_STRUCTURE_PATH))[0][1]
if os.path.exists(cnfg.HIGH_PRIO_STRUCTURE_PATH)
else list(os.walk(LIBRARY_BASE_PATH))[0][1]
)
platforms = [d for _, d in iter_directories(platforms_dir)]
return self._exclude_platforms(cnfg, platforms)
except IndexError as exc:
raise FolderStructureNotMatchException from exc

View File

@@ -7,6 +7,7 @@ from config import LIBRARY_BASE_PATH
from config.config_manager import config_manager as cm
from exceptions.fs_exceptions import RomAlreadyExistsException, RomsNotFoundException
from models.platform import Platform
from utils.filesystem import iter_directories, iter_files
from .base_handler import (
LANGUAGES_BY_SHORTCODE,
@@ -84,7 +85,7 @@ class FSRomsHandler(FSHandler):
return [f for f in roms if f not in filtered_files]
def get_rom_files(self, rom: str, roms_path: str) -> list[str]:
rom_files: list = []
rom_files: list[str] = []
for path, _, files in os.walk(f"{roms_path}/{rom}"):
for f in self._exclude_files(files, "multi_parts"):
@@ -104,12 +105,12 @@ class FSRomsHandler(FSHandler):
roms_file_path = f"{LIBRARY_BASE_PATH}/{roms_path}"
try:
fs_single_roms: list[str] = list(os.walk(roms_file_path))[0][2]
fs_single_roms = [f for _, f in iter_files(roms_file_path)]
except IndexError as exc:
raise RomsNotFoundException(platform.fs_slug) from exc
try:
fs_multi_roms: list[str] = list(os.walk(roms_file_path))[0][1]
fs_multi_roms = [d for _, d in iter_directories(roms_file_path)]
except IndexError as exc:
raise RomsNotFoundException(platform.fs_slug) from exc

View File

@@ -0,0 +1,29 @@
import os
from collections.abc import Iterator
from pathlib import Path
def iter_files(path: str, recursive: bool = False) -> Iterator[tuple[Path, str]]:
"""List files in a directory.
Yields tuples where the first element is the path to the directory where the file is located,
and the second element is the name of the file.
"""
for root, _, files in os.walk(path, topdown=True):
for file in files:
yield Path(root), file
if not recursive:
break
def iter_directories(path: str, recursive: bool = False) -> Iterator[tuple[Path, str]]:
"""List directories in a directory.
Yields tuples where the first element is the path to the directory where the directory is located,
and the second element is the name of the directory.
"""
for root, dirs, _ in os.walk(path, topdown=True):
for directory in dirs:
yield Path(root), directory
if not recursive:
break