Skip to content

report

ensembl.io.genomio.seq_region.report

Object for an INSDC assembly report to expose its data and metadata easily.

ReportRecord

Represent an assembly report file. Exposes 2 things: - Metadata as a dict from the comments. - A DictReader that yields all the seq_region lines of the report as dicts.

Source code in src/python/ensembl/io/genomio/seq_region/report.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class ReportRecord:
    """Represent an assembly report file. Exposes 2 things:
    - Metadata as a dict from the comments.
    - A DictReader that yields all the seq_region lines of the report as dicts.
    """

    def __init__(self, report_path: Path) -> None:
        report_csv, metadata = self.report_to_csv(report_path)
        self.metadata = metadata
        self.reader = csv.DictReader(report_csv.splitlines(), delimiter="\t", quoting=csv.QUOTE_NONE)

    @staticmethod
    def report_to_csv(report_path: PathLike) -> Tuple[str, dict]:
        """Returns an assembly report as a CSV string.

        Args:
            report_path: Path to a seq_region file from INSDC/RefSeq.

        Returns:
            The data as a string in CSV format, and the head metadata as a dictionary.

        """
        with open_gz_file(report_path) as report:
            data = ""
            metadata = {}
            header_line = ""
            for line in report:
                if line.startswith("#"):
                    # Get metadata values if possible
                    match = re.search("# (.+?): (.+?)$", line)
                    if match:
                        metadata[match.group(1)] = match.group(2)
                    header_line = line
                else:
                    data += line

            if not header_line:
                raise ValueError("Missing header in report")
            data = header_line[2:].strip() + "\n" + data

            return data, metadata

metadata = metadata instance-attribute

reader = csv.DictReader(report_csv.splitlines(), delimiter='\t', quoting=csv.QUOTE_NONE) instance-attribute

report_to_csv(report_path) staticmethod

Returns an assembly report as a CSV string.

Parameters:

Name Type Description Default
report_path PathLike

Path to a seq_region file from INSDC/RefSeq.

required

Returns:

Type Description
Tuple[str, dict]

The data as a string in CSV format, and the head metadata as a dictionary.

Source code in src/python/ensembl/io/genomio/seq_region/report.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
@staticmethod
def report_to_csv(report_path: PathLike) -> Tuple[str, dict]:
    """Returns an assembly report as a CSV string.

    Args:
        report_path: Path to a seq_region file from INSDC/RefSeq.

    Returns:
        The data as a string in CSV format, and the head metadata as a dictionary.

    """
    with open_gz_file(report_path) as report:
        data = ""
        metadata = {}
        header_line = ""
        for line in report:
            if line.startswith("#"):
                # Get metadata values if possible
                match = re.search("# (.+?): (.+?)$", line)
                if match:
                    metadata[match.group(1)] = match.group(2)
                header_line = line
            else:
                data += line

        if not header_line:
            raise ValueError("Missing header in report")
        data = header_line[2:].strip() + "\n" + data

        return data, metadata