Merge pull request #3029 from rommapp/aikido-13126604

[AIKIDO-13126604] Stream file when building file hash
This commit is contained in:
Georges-Antoine Assi
2026-02-16 18:34:33 -05:00
committed by GitHub
3 changed files with 55 additions and 47 deletions

View File

@@ -9,36 +9,6 @@ from models.user import User
from .base_handler import FSHandler
def compute_file_hash(file_path: str) -> str:
hash_obj = hashlib.md5(usedforsecurity=False)
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hash_obj.update(chunk)
return hash_obj.hexdigest()
def compute_zip_hash(zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zf:
file_hashes = []
for name in sorted(zf.namelist()):
if not name.endswith("/"):
content = zf.read(name)
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
file_hashes.append(f"{name}:{file_hash}")
combined = "\n".join(file_hashes)
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
def compute_content_hash(file_path: str) -> str | None:
try:
if zipfile.is_zipfile(file_path):
return compute_zip_hash(file_path)
return compute_file_hash(file_path)
except Exception as e:
log.debug(f"Failed to compute content hash for {file_path}: {e}")
return None
class FSAssetsHandler(FSHandler):
def __init__(self) -> None:
super().__init__(base_path=ASSETS_BASE_PATH)
@@ -97,3 +67,30 @@ class FSAssetsHandler(FSHandler):
return self._build_asset_file_path(
user, "screenshots", platform_fs_slug, rom_id
)
async def _compute_file_hash(self, file_path: str) -> str:
hash_obj = hashlib.md5(usedforsecurity=False)
async with await self.stream_file(file_path=file_path) as f:
while chunk := await f.read(8192):
hash_obj.update(chunk)
return hash_obj.hexdigest()
async def _compute_zip_hash(self, zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zf:
file_hashes = []
for name in sorted(zf.namelist()):
if not name.endswith("/"):
content = zf.read(name)
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
file_hashes.append(f"{name}:{file_hash}")
combined = "\n".join(file_hashes)
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
async def compute_content_hash(self, file_path: str) -> str | None:
try:
if zipfile.is_zipfile(file_path):
return await self._compute_zip_hash(file_path)
return await self._compute_file_hash(file_path)
except Exception as e:
log.debug(f"Failed to compute content hash for {file_path}: {e}")
return None

View File

@@ -9,7 +9,6 @@ from config.config_manager import config_manager as cm
from endpoints.responses.rom import SimpleRomSchema
from handler.database import db_platform_handler, db_rom_handler
from handler.filesystem import fs_asset_handler, fs_firmware_handler
from handler.filesystem.assets_handler import compute_content_hash
from handler.filesystem.roms_handler import FSRom
from handler.metadata import (
meta_flashpoint_handler,
@@ -833,8 +832,9 @@ async def _scan_asset(file_name: str, asset_path: str, should_hash: bool = False
}
if should_hash:
absolute_path = f"{ASSETS_BASE_PATH}/{file_path}"
result["content_hash"] = compute_content_hash(absolute_path)
result["content_hash"] = await fs_asset_handler.compute_content_hash(
f"{ASSETS_BASE_PATH}/{file_path}"
)
return result

View File

@@ -1826,6 +1826,8 @@ class TestConfirmDownload:
sync = db_device_save_sync_handler.get_sync(
device_id=device.id, save_id=save.id
)
assert sync is not None
assert sync.last_synced_at.replace(
microsecond=0, tzinfo=None
) == save.updated_at.replace(microsecond=0, tzinfo=None)
@@ -1853,10 +1855,11 @@ class TestConfirmDownload:
updated_device = db_device_handler.get_device(
device_id=device.id, user_id=device.user_id
)
assert updated_device is not None
assert updated_device.last_seen is not None
if original_last_seen:
assert updated_device.last_seen > original_last_seen
else:
assert updated_device.last_seen is not None
def test_confirm_download_save_not_found(
self,
@@ -2096,42 +2099,50 @@ class TestContentHashDeduplication:
class TestContentHashComputation:
def test_compute_file_hash(self, tmp_path):
from handler.filesystem.assets_handler import compute_file_hash
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
async def test_compute_file_hash(self, mock_validate_path, tmp_path):
from handler.filesystem import fs_asset_handler
test_file = tmp_path / "test.sav"
test_file.write_bytes(b"test content for hashing")
mock_validate_path.return_value = test_file
hash_result = compute_file_hash(str(test_file))
hash_result = await fs_asset_handler._compute_file_hash(str(test_file))
assert hash_result is not None
assert len(hash_result) == 32
hash_result2 = compute_file_hash(str(test_file))
hash_result2 = await fs_asset_handler._compute_file_hash(str(test_file))
assert hash_result == hash_result2
def test_same_content_produces_same_hash(self, tmp_path):
from handler.filesystem.assets_handler import compute_file_hash
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
async def test_same_content_produces_same_hash(self, mock_validate_path, tmp_path):
from handler.filesystem import fs_asset_handler
file1 = tmp_path / "save1.sav"
file2 = tmp_path / "save2.sav"
file1.write_bytes(b"identical content")
file2.write_bytes(b"identical content")
mock_validate_path.side_effect = [file1, file2]
hash1 = compute_file_hash(str(file1))
hash2 = compute_file_hash(str(file2))
hash1 = await fs_asset_handler._compute_file_hash(str(file1))
hash2 = await fs_asset_handler._compute_file_hash(str(file2))
assert hash1 == hash2
def test_different_content_produces_different_hash(self, tmp_path):
from handler.filesystem.assets_handler import compute_file_hash
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
async def test_different_content_produces_different_hash(
self, mock_validate_path, tmp_path
):
from handler.filesystem import fs_asset_handler
file1 = tmp_path / "save1.sav"
file2 = tmp_path / "save2.sav"
file1.write_bytes(b"content A")
file2.write_bytes(b"content B")
mock_validate_path.side_effect = [file1, file2]
hash1 = compute_file_hash(str(file1))
hash2 = compute_file_hash(str(file2))
hash1 = await fs_asset_handler._compute_file_hash(str(file1))
hash2 = await fs_asset_handler._compute_file_hash(str(file2))
assert hash1 != hash2