Skip to content

gbff

ensembl.io.genomio.seq_region.gbff

A SeqRecord wrapper.

GBFFRecord dataclass

Wrapper around a SeqRecord object to extract specific data.

Source code in src/python/ensembl/io/genomio/seq_region/gbff.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
@dataclass
class GBFFRecord:
    """Wrapper around a `SeqRecord` object to extract specific data."""

    record: SeqRecord

    def get_genbank_id(self) -> str | None:
        """Returns the GenBank accession from a given sequence record (if present).

        Only useful for RefSeq sequence records, where the GenBank accession is stored in a comment.

        Args:
            record: Sequence record.

        """
        comment = str(self.record.annotations.get("comment", ""))
        if not comment:
            return None
        comment = re.sub(r"[ \n\r]+", " ", comment)
        match = re.search(r"The reference sequence was derived from ([^\.]+)\.", comment)
        if not match:
            return None
        return match.group(1)

    def get_codon_table(self) -> int | None:
        """Returns the codon table number from a given a GenBank sequence record (if present)."""
        for feat in self.record.features:
            if "transl_table" in feat.qualifiers:
                return int(feat.qualifiers["transl_table"][0])
        return None

    def get_organelle(self, molecule_location: Mapping[str, str] = MOLECULE_LOCATION) -> str | None:
        """Returns the organelle location from the given GenBank record (if present).

        Args:
            record: GenBank sequence record.
            molecule_location: Map of sequence type to SO location.

        Raises:
            UnknownMetadata: If the location is not part of the controlled vocabulary.

        """
        location = None
        for feat in self.record.features:
            if "organelle" not in feat.qualifiers:
                continue
            organelle = str(feat.qualifiers["organelle"][0])
            # Remove plastid prefix
            with_prefix = re.match(r"^(plastid|mitochondrion):(.+)$", organelle)
            if with_prefix:
                organelle = with_prefix[2]
            # Get controlled name
            try:
                location = molecule_location[organelle]
            except KeyError as exc:
                raise UnknownMetadata(f"Unrecognized sequence location: {organelle}") from exc
            break
        return location

    def is_circular(self) -> bool:
        """Returns True if the record says that the sequence is circular, False otherwise."""
        return self.record.annotations.get("topology", "") == "circular"

record instance-attribute

get_codon_table()

Returns the codon table number from a given a GenBank sequence record (if present).

Source code in src/python/ensembl/io/genomio/seq_region/gbff.py
55
56
57
58
59
60
def get_codon_table(self) -> int | None:
    """Returns the codon table number from a given a GenBank sequence record (if present)."""
    for feat in self.record.features:
        if "transl_table" in feat.qualifiers:
            return int(feat.qualifiers["transl_table"][0])
    return None

get_genbank_id()

Returns the GenBank accession from a given sequence record (if present).

Only useful for RefSeq sequence records, where the GenBank accession is stored in a comment.

Parameters:

Name Type Description Default
record

Sequence record.

required
Source code in src/python/ensembl/io/genomio/seq_region/gbff.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def get_genbank_id(self) -> str | None:
    """Returns the GenBank accession from a given sequence record (if present).

    Only useful for RefSeq sequence records, where the GenBank accession is stored in a comment.

    Args:
        record: Sequence record.

    """
    comment = str(self.record.annotations.get("comment", ""))
    if not comment:
        return None
    comment = re.sub(r"[ \n\r]+", " ", comment)
    match = re.search(r"The reference sequence was derived from ([^\.]+)\.", comment)
    if not match:
        return None
    return match.group(1)

get_organelle(molecule_location=MOLECULE_LOCATION)

Returns the organelle location from the given GenBank record (if present).

Parameters:

Name Type Description Default
record

GenBank sequence record.

required
molecule_location Mapping[str, str]

Map of sequence type to SO location.

MOLECULE_LOCATION

Raises:

Type Description
UnknownMetadata

If the location is not part of the controlled vocabulary.

Source code in src/python/ensembl/io/genomio/seq_region/gbff.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def get_organelle(self, molecule_location: Mapping[str, str] = MOLECULE_LOCATION) -> str | None:
    """Returns the organelle location from the given GenBank record (if present).

    Args:
        record: GenBank sequence record.
        molecule_location: Map of sequence type to SO location.

    Raises:
        UnknownMetadata: If the location is not part of the controlled vocabulary.

    """
    location = None
    for feat in self.record.features:
        if "organelle" not in feat.qualifiers:
            continue
        organelle = str(feat.qualifiers["organelle"][0])
        # Remove plastid prefix
        with_prefix = re.match(r"^(plastid|mitochondrion):(.+)$", organelle)
        if with_prefix:
            organelle = with_prefix[2]
        # Get controlled name
        try:
            location = molecule_location[organelle]
        except KeyError as exc:
            raise UnknownMetadata(f"Unrecognized sequence location: {organelle}") from exc
        break
    return location

is_circular()

Returns True if the record says that the sequence is circular, False otherwise.

Source code in src/python/ensembl/io/genomio/seq_region/gbff.py
90
91
92
def is_circular(self) -> bool:
    """Returns True if the record says that the sequence is circular, False otherwise."""
    return self.record.annotations.get("topology", "") == "circular"