Skip to content

archive

ensembl.utils.archive

Utils for common IO operations over archive files, e.g. tar or gzip.

SUPPORTED_ARCHIVE_FORMATS = [ext for elem in (shutil.get_unpack_formats()) for ext in (elem[1])] module-attribute

extract_file(src_file, dst_dir)

Extracts the src_file into dst_dir.

If the file is not an archive, it will be copied to dst_dir. dst_dir will be created if it does not exist.

Parameters:

Name Type Description Default
src_file StrPath

Path to the file to unpack.

required
dst_dir StrPath

Path to the folder where to extract the file.

required
Source code in src/ensembl/utils/archive.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def extract_file(src_file: StrPath, dst_dir: StrPath) -> None:
    """Extracts the `src_file` into `dst_dir`.

    If the file is not an archive, it will be copied to `dst_dir`. `dst_dir` will be created if it
    does not exist.

    Args:
        src_file: Path to the file to unpack.
        dst_dir: Path to the folder where to extract the file.

    """
    src_file = Path(src_file)
    extensions = {"".join(src_file.suffixes[i:]) for i in range(0, len(src_file.suffixes))}

    if extensions.intersection(SUPPORTED_ARCHIVE_FORMATS):
        shutil.unpack_archive(src_file, dst_dir)
    else:
        # Replicate the functionality of shutil.unpack_archive() by creating `dst_dir`
        Path(dst_dir).mkdir(parents=True, exist_ok=True)
        shutil.copy(src_file, dst_dir)

extract_file_cli()

Entry-point for the extract_file method

Source code in src/ensembl/utils/archive.py
101
102
103
104
105
106
107
108
109
def extract_file_cli() -> None:
    """Entry-point for the `extract_file` method"""
    parser = ArgumentParser(description="Extracts file to the given location.")
    parser.add_argument_src_path("--src_file", required=True, help="Path to the file to unpack")
    parser.add_argument_dst_path(
        "--dst_dir", default=Path.cwd(), help="Path to the folder where to extract the file"
    )
    args = parser.parse_args()
    extract_file(args.src_file, args.dst_dir)

open_gz_file(file_path, mode='rt', encoding='utf-8')

Yields an open file object, even if the file is compressed with gzip.

The file is expected to contain a text, and this can be used with the usual "with".

Parameters:

Name Type Description Default
file_path StrPath

A (single) file path to open.

required
mode str

The mode in which the file is opened.

'rt'
encoding str

The name of the encoding used to decode or encode the file.

'utf-8'
Source code in src/ensembl/utils/archive.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
@contextmanager
def open_gz_file(
    file_path: StrPath, mode: str = "rt", encoding: str = "utf-8"
) -> Generator[gzip.GzipFile | IO, None, None]:
    """Yields an open file object, even if the file is compressed with gzip.

    The file is expected to contain a text, and this can be used with the usual "with".

    Args:
        file_path: A (single) file path to open.
        mode: The mode in which the file is opened.
        encoding: The name of the encoding used to decode or encode the file.

    """
    src_file = Path(file_path)
    if src_file.suffix == ".gz":
        with gzip.open(src_file, mode, encoding=encoding) as fh:
            yield fh
    else:
        with src_file.open(mode, encoding=encoding) as fh:
            yield fh