Coverage for src/ensembl/utils/checksums.py: 100%
12 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-06 14:10 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-06 14:10 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Utils for common hash operations (often referred to as checksums) over files, e.g. MD5 or SHA128."""
17import hashlib
18from pathlib import Path
20from ensembl.utils import StrPath
23def get_file_hash(file_path: StrPath, algorithm: str = "md5") -> str:
24 """Returns the hash value for a given file and hash algorithm.
26 Args:
27 file_path: File path to get the hash for.
28 algorithm: Secure hash or message digest algorithm name.
29 """
30 hash_func = hashlib.new(algorithm)
31 with Path(file_path).open("rb") as f:
32 data_bytes = f.read()
33 hash_func.update(data_bytes)
34 return hash_func.hexdigest()
37def validate_file_hash(file_path: StrPath, hash_value: str, algorithm: str = "md5") -> bool:
38 """Returns true if the file's hash value is the same as the one provided for that hash
39 algorithm, false otherwise.
41 Args:
42 file_path: Path to the file to validate.
43 hash_value: Expected hash value.
44 algorithm: Secure hash or message digest algorithm name.
45 """
46 file_hash = get_file_hash(file_path, algorithm)
47 return file_hash == hash_value