Source code for ensembl.utils.rloader
# See the NOTICE file distributed with this work for additional information
# regarding copyright ownership.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Allow to seamlessly load / read the content of a remote file as if it was located locally."""
from __future__ import annotations
__all__ = ["RemoteFileLoader"]
import configparser
import json
import logging
from io import StringIO
from typing import Any
import dotenv
import requests
import requests.exceptions
import yaml
logger = logging.getLogger(__name__)
[docs]
class RemoteFileLoader:
"""Loads remote files, allowing specific format parsing options.
Args:
parser: Parser to use for this object. Default: `None` (no format-specific parsing done).
Attributes:
available_formats: File formats with ad-hoc parser available.
parser: Parser selected for this object.
"""
available_formats: set[str] = {"yaml", "ini", "env", "json"}
parser: str | None = None
def __init__(self, parser: str | None = None) -> None:
if parser in self.available_formats:
self.parser = parser
def __parse(self, content: str) -> Any:
if self.parser == "yaml":
return yaml.load(content, yaml.SafeLoader)
if self.parser == "ini":
config = configparser.ConfigParser()
try:
config.read_string(content)
except configparser.MissingSectionHeaderError:
content = "[DEFAULT]\n" + content
config.read_string(content)
return config
if self.parser == "env":
return dotenv.dotenv_values(stream=StringIO(content))
if self.parser == "json":
return json.loads(content)
# Only return content, no parsing
return content
[docs]
def r_open(self, url: str) -> Any:
"""Returns the parsed remote file from the given URL.
Args:
url: URL of the remote file to fetch.
Raises:
requests.exception.HTTPError: If loading or requesting the given URL returned an error.
requests.exception.Timeout: If a timeout was raised whilst requesting the given URL.
"""
try:
r = requests.get(url, timeout=120)
if r.status_code == 200:
return self.__parse(r.text)
raise requests.exceptions.HTTPError(response=r)
except requests.exceptions.HTTPError as ex:
logger.exception(f"Error with request to {url}: {ex}")
raise ex
except requests.exceptions.Timeout as ex:
logger.exception(f"Request timed out {url}: {ex}")
raise ex