Skip to content

download

ensembl.io.genomio.genbank.download

Download a Genbank file from NCBI from an accession.

DownloadError

Bases: Exception

In case a download failed.

Source code in src/python/ensembl/io/genomio/genbank/download.py
30
31
32
33
34
class DownloadError(Exception):
    """In case a download failed."""

    def __init__(self, msg: str) -> None:
        self.msg = msg

msg = msg instance-attribute

download_genbank(accession, output_file)

Given a GenBank accession, download the corresponding file in GenBank format.

Uses NCBI Entrez service to fetch the data.

Parameters:

Name Type Description Default
accession str

INSDC Genbank record accession.

required
output_file PathLike

Path to the downloaded record in Genbank format.

required

Raises:

Type Description
DownloadError

If the download fails.

Source code in src/python/ensembl/io/genomio/genbank/download.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def download_genbank(accession: str, output_file: PathLike) -> None:
    """Given a GenBank accession, download the corresponding file in GenBank format.

    Uses NCBI Entrez service to fetch the data.

    Args:
        accession: INSDC Genbank record accession.
        output_file: Path to the downloaded record in Genbank format.

    Raises:
        DownloadError: If the download fails.

    """

    # Get the list of assemblies for this accession
    entrez_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    entrez_params = {
        "db": "nuccore",
        "rettype": "gbwithparts",
        "retmode": "text",
    }
    entrez_params["id"] = accession
    logging.debug(f"Getting file from {entrez_url} with params {entrez_params}")
    result = requests.get(entrez_url, params=entrez_params, timeout=60)
    if result and result.status_code == 200:
        with Path(output_file).open("wb") as gbff:
            gbff.write(result.content)
        logging.info(f"GenBank file written to {output_file}")
        return
    raise DownloadError(f"Could not download the genbank ({accession}) file: {result}")

main()

Main script entry-point.

Source code in src/python/ensembl/io/genomio/genbank/download.py
69
70
71
72
73
74
75
76
77
78
79
def main() -> None:
    """Main script entry-point."""
    parser = ArgumentParser(description="Download a sequence from GenBank.")
    parser.add_argument("--accession", required=True, help="Sequence accession")
    parser.add_argument_dst_path("--output_file", required=True, help="Output GenBank file")
    parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__)
    parser.add_log_arguments()
    args = parser.parse_args()
    init_logging_with_args(args)

    download_genbank(accession=args.accession, output_file=args.output_file)