Skip to content

archive

ensembl.utils.archive

Utils for common IO operations over archive files, e.g. tar or gzip.

SUPPORTED_ARCHIVE_FORMATS = [ext for elem in shutil.get_unpack_formats() for ext in elem[1]] module-attribute

extract_file(src_file, dst_dir)

Extracts the src_file into dst_dir.

If the file is not an archive, it will be copied to dst_dir. dst_dir will be created if it does not exist.

Parameters:

Name Type Description Default
src_file StrPath

Path to the file to unpack.

required
dst_dir StrPath

Path to the folder where to extract the file.

required
Source code in src/ensembl/utils/archive.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def extract_file(src_file: StrPath, dst_dir: StrPath) -> None:
    """Extracts the `src_file` into `dst_dir`.

    If the file is not an archive, it will be copied to `dst_dir`. `dst_dir` will be created if it
    does not exist.

    Args:
        src_file: Path to the file to unpack.
        dst_dir: Path to the folder where to extract the file.

    """
    src_file = Path(src_file)
    extensions = {"".join(src_file.suffixes[i:]) for i in range(0, len(src_file.suffixes))}

    if extensions.intersection(SUPPORTED_ARCHIVE_FORMATS):
        shutil.unpack_archive(src_file, dst_dir)
    else:
        # Replicate the functionality of shutil.unpack_archive() by creating `dst_dir`
        Path(dst_dir).mkdir(parents=True, exist_ok=True)
        shutil.copy(src_file, dst_dir)

extract_file_cli()

Entry-point for the extract_file method

Source code in src/ensembl/utils/archive.py
 97
 98
 99
100
101
102
103
104
105
def extract_file_cli() -> None:
    """Entry-point for the `extract_file` method"""
    parser = ArgumentParser(description="Extracts file to the given location.")
    parser.add_argument_src_path("--src_file", required=True, help="Path to the file to unpack")
    parser.add_argument_dst_path(
        "--dst_dir", default=Path.cwd(), help="Path to the folder where to extract the file"
    )
    args = parser.parse_args()
    extract_file(args.src_file, args.dst_dir)

open_gz_file(file_path)

Yields an open file object, even if the file is compressed with gzip.

The file is expected to contain a text, and this can be used with the usual "with".

Parameters:

Name Type Description Default
file_path StrPath

A (single) file path to open.

required
Source code in src/ensembl/utils/archive.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@contextmanager
def open_gz_file(file_path: StrPath) -> Generator[TextIO, None, None]:
    """Yields an open file object, even if the file is compressed with gzip.

    The file is expected to contain a text, and this can be used with the usual "with".

    Args:
        file_path: A (single) file path to open.

    """
    src_file = Path(file_path)
    if src_file.suffix == ".gz":
        with gzip.open(src_file, "rt") as fh:
            yield fh
    else:
        with src_file.open("rt") as fh:
            yield fh