Coverage for src/ensembl/utils/checksums.py: 100%

12 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-06 14:10 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15"""Utils for common hash operations (often referred to as checksums) over files, e.g. MD5 or SHA128.""" 

16 

17import hashlib 

18from pathlib import Path 

19 

20from ensembl.utils import StrPath 

21 

22 

23def get_file_hash(file_path: StrPath, algorithm: str = "md5") -> str: 

24 """Returns the hash value for a given file and hash algorithm. 

25 

26 Args: 

27 file_path: File path to get the hash for. 

28 algorithm: Secure hash or message digest algorithm name. 

29 """ 

30 hash_func = hashlib.new(algorithm) 

31 with Path(file_path).open("rb") as f: 

32 data_bytes = f.read() 

33 hash_func.update(data_bytes) 

34 return hash_func.hexdigest() 

35 

36 

37def validate_file_hash(file_path: StrPath, hash_value: str, algorithm: str = "md5") -> bool: 

38 """Returns true if the file's hash value is the same as the one provided for that hash 

39 algorithm, false otherwise. 

40 

41 Args: 

42 file_path: Path to the file to validate. 

43 hash_value: Expected hash value. 

44 algorithm: Secure hash or message digest algorithm name. 

45 """ 

46 file_hash = get_file_hash(file_path, algorithm) 

47 return file_hash == hash_value