mirror of
https://github.com/rommapp/romm.git
synced 2026-03-03 02:27:00 +00:00
Merge pull request #3029 from rommapp/aikido-13126604
[AIKIDO-13126604] Stream file when building file hash
This commit is contained in:
@@ -9,36 +9,6 @@ from models.user import User
|
||||
from .base_handler import FSHandler
|
||||
|
||||
|
||||
def compute_file_hash(file_path: str) -> str:
|
||||
hash_obj = hashlib.md5(usedforsecurity=False)
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
hash_obj.update(chunk)
|
||||
return hash_obj.hexdigest()
|
||||
|
||||
|
||||
def compute_zip_hash(zip_path: str) -> str:
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
file_hashes = []
|
||||
for name in sorted(zf.namelist()):
|
||||
if not name.endswith("/"):
|
||||
content = zf.read(name)
|
||||
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
|
||||
file_hashes.append(f"{name}:{file_hash}")
|
||||
combined = "\n".join(file_hashes)
|
||||
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
|
||||
|
||||
|
||||
def compute_content_hash(file_path: str) -> str | None:
|
||||
try:
|
||||
if zipfile.is_zipfile(file_path):
|
||||
return compute_zip_hash(file_path)
|
||||
return compute_file_hash(file_path)
|
||||
except Exception as e:
|
||||
log.debug(f"Failed to compute content hash for {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class FSAssetsHandler(FSHandler):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(base_path=ASSETS_BASE_PATH)
|
||||
@@ -97,3 +67,30 @@ class FSAssetsHandler(FSHandler):
|
||||
return self._build_asset_file_path(
|
||||
user, "screenshots", platform_fs_slug, rom_id
|
||||
)
|
||||
|
||||
async def _compute_file_hash(self, file_path: str) -> str:
|
||||
hash_obj = hashlib.md5(usedforsecurity=False)
|
||||
async with await self.stream_file(file_path=file_path) as f:
|
||||
while chunk := await f.read(8192):
|
||||
hash_obj.update(chunk)
|
||||
return hash_obj.hexdigest()
|
||||
|
||||
async def _compute_zip_hash(self, zip_path: str) -> str:
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
file_hashes = []
|
||||
for name in sorted(zf.namelist()):
|
||||
if not name.endswith("/"):
|
||||
content = zf.read(name)
|
||||
file_hash = hashlib.md5(content, usedforsecurity=False).hexdigest()
|
||||
file_hashes.append(f"{name}:{file_hash}")
|
||||
combined = "\n".join(file_hashes)
|
||||
return hashlib.md5(combined.encode(), usedforsecurity=False).hexdigest()
|
||||
|
||||
async def compute_content_hash(self, file_path: str) -> str | None:
|
||||
try:
|
||||
if zipfile.is_zipfile(file_path):
|
||||
return await self._compute_zip_hash(file_path)
|
||||
return await self._compute_file_hash(file_path)
|
||||
except Exception as e:
|
||||
log.debug(f"Failed to compute content hash for {file_path}: {e}")
|
||||
return None
|
||||
|
||||
@@ -9,7 +9,6 @@ from config.config_manager import config_manager as cm
|
||||
from endpoints.responses.rom import SimpleRomSchema
|
||||
from handler.database import db_platform_handler, db_rom_handler
|
||||
from handler.filesystem import fs_asset_handler, fs_firmware_handler
|
||||
from handler.filesystem.assets_handler import compute_content_hash
|
||||
from handler.filesystem.roms_handler import FSRom
|
||||
from handler.metadata import (
|
||||
meta_flashpoint_handler,
|
||||
@@ -833,8 +832,9 @@ async def _scan_asset(file_name: str, asset_path: str, should_hash: bool = False
|
||||
}
|
||||
|
||||
if should_hash:
|
||||
absolute_path = f"{ASSETS_BASE_PATH}/{file_path}"
|
||||
result["content_hash"] = compute_content_hash(absolute_path)
|
||||
result["content_hash"] = await fs_asset_handler.compute_content_hash(
|
||||
f"{ASSETS_BASE_PATH}/{file_path}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -1826,6 +1826,8 @@ class TestConfirmDownload:
|
||||
sync = db_device_save_sync_handler.get_sync(
|
||||
device_id=device.id, save_id=save.id
|
||||
)
|
||||
assert sync is not None
|
||||
|
||||
assert sync.last_synced_at.replace(
|
||||
microsecond=0, tzinfo=None
|
||||
) == save.updated_at.replace(microsecond=0, tzinfo=None)
|
||||
@@ -1853,10 +1855,11 @@ class TestConfirmDownload:
|
||||
updated_device = db_device_handler.get_device(
|
||||
device_id=device.id, user_id=device.user_id
|
||||
)
|
||||
|
||||
assert updated_device is not None
|
||||
assert updated_device.last_seen is not None
|
||||
if original_last_seen:
|
||||
assert updated_device.last_seen > original_last_seen
|
||||
else:
|
||||
assert updated_device.last_seen is not None
|
||||
|
||||
def test_confirm_download_save_not_found(
|
||||
self,
|
||||
@@ -2096,42 +2099,50 @@ class TestContentHashDeduplication:
|
||||
|
||||
|
||||
class TestContentHashComputation:
|
||||
def test_compute_file_hash(self, tmp_path):
|
||||
from handler.filesystem.assets_handler import compute_file_hash
|
||||
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
|
||||
async def test_compute_file_hash(self, mock_validate_path, tmp_path):
|
||||
from handler.filesystem import fs_asset_handler
|
||||
|
||||
test_file = tmp_path / "test.sav"
|
||||
test_file.write_bytes(b"test content for hashing")
|
||||
mock_validate_path.return_value = test_file
|
||||
|
||||
hash_result = compute_file_hash(str(test_file))
|
||||
hash_result = await fs_asset_handler._compute_file_hash(str(test_file))
|
||||
|
||||
assert hash_result is not None
|
||||
assert len(hash_result) == 32
|
||||
|
||||
hash_result2 = compute_file_hash(str(test_file))
|
||||
hash_result2 = await fs_asset_handler._compute_file_hash(str(test_file))
|
||||
assert hash_result == hash_result2
|
||||
|
||||
def test_same_content_produces_same_hash(self, tmp_path):
|
||||
from handler.filesystem.assets_handler import compute_file_hash
|
||||
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
|
||||
async def test_same_content_produces_same_hash(self, mock_validate_path, tmp_path):
|
||||
from handler.filesystem import fs_asset_handler
|
||||
|
||||
file1 = tmp_path / "save1.sav"
|
||||
file2 = tmp_path / "save2.sav"
|
||||
file1.write_bytes(b"identical content")
|
||||
file2.write_bytes(b"identical content")
|
||||
mock_validate_path.side_effect = [file1, file2]
|
||||
|
||||
hash1 = compute_file_hash(str(file1))
|
||||
hash2 = compute_file_hash(str(file2))
|
||||
hash1 = await fs_asset_handler._compute_file_hash(str(file1))
|
||||
hash2 = await fs_asset_handler._compute_file_hash(str(file2))
|
||||
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_different_content_produces_different_hash(self, tmp_path):
|
||||
from handler.filesystem.assets_handler import compute_file_hash
|
||||
@mock.patch("handler.filesystem.fs_asset_handler.validate_path")
|
||||
async def test_different_content_produces_different_hash(
|
||||
self, mock_validate_path, tmp_path
|
||||
):
|
||||
from handler.filesystem import fs_asset_handler
|
||||
|
||||
file1 = tmp_path / "save1.sav"
|
||||
file2 = tmp_path / "save2.sav"
|
||||
file1.write_bytes(b"content A")
|
||||
file2.write_bytes(b"content B")
|
||||
mock_validate_path.side_effect = [file1, file2]
|
||||
|
||||
hash1 = compute_file_hash(str(file1))
|
||||
hash2 = compute_file_hash(str(file2))
|
||||
hash1 = await fs_asset_handler._compute_file_hash(str(file1))
|
||||
hash2 = await fs_asset_handler._compute_file_hash(str(file2))
|
||||
|
||||
assert hash1 != hash2
|
||||
|
||||
Reference in New Issue
Block a user