Skip to content

api

python.ensembl.ncbi_taxonomy.api

Taxonomy API module

Taxonomy

Contains all the taxonomy related functions over NCBITaxonomy ORM

Attributes:

Name Type Description
session

db Session()

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
@as_declarative()
class Taxonomy:
    """Contains all the taxonomy related functions over NCBITaxonomy ORM

    Attributes:
        session: db Session()
    """

    @classmethod
    def fetch_node_by_id(cls, session: Session, taxon_id: int) -> NCBITaxonomy:
        """Returns taxonomy node object by ``taxon_id``

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        """
        q = session.query(NCBITaxonomy).filter(NCBITaxonomy.taxon_id == taxon_id).first()
        if not q:
            raise NoResultFound()
        return q

    @classmethod
    def fetch_taxon_by_species_name(cls, session: Session, name: str) -> NCBITaxonomy:
        """Returns first taxonomy object matching ``name``

        Args:
            name: Scientific ncbi_taxa_name.name in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        """
        q = (
            session.query(NCBITaxonomy)
            .filter(NCBITaxonomy.name == (name.replace("_", " ")))
            .filter(NCBITaxonomy.name_class == "scientific name")
            .first()
        )
        if not q:
            raise NoResultFound()
        return q

    @classmethod
    def parent(cls, session: Session, taxon_id: int) -> NCBITaxonomy:
        """Returns taxonomy node object for parent node

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        """
        ParentTaxonomy = aliased(NCBITaxonomy, name="parent_ncbi_taxonomy")
        q = (
            session.query(NCBITaxonomy, ParentTaxonomy)
            .outerjoin(ParentTaxonomy, NCBITaxonomy.parent_id == ParentTaxonomy.taxon_id)
            .filter(NCBITaxonomy.taxon_id == taxon_id)
            .filter(ParentTaxonomy.name_class == "scientific name")
            .first()
        )
        try:
            return q[1]
        except TypeError as exc:
            raise NoResultFound() from exc

    @classmethod
    def children(cls, session: Session, taxon_id: int) -> tuple:
        """Returns taxonomy node object for children nodes

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
            or has no children
        """
        q = (
            session.query(NCBITaxonomy)
            .filter(NCBITaxonomy.parent_id == taxon_id)
            .filter(NCBITaxonomy.name_class == "scientific name")
            .all()
        )
        results = list(q)
        rows = [x.__dict__ for x in results]
        q = tuple(rows)
        if not q:
            raise NoResultFound()
        return q

    @classmethod
    def is_root(cls, session: Session, taxon_id: int) -> bool:
        """Returns True if ``taxon_id`` is a root and False if not

        Args:
            taxon_id: Unique taxonomy identifier in database
        """
        try:
            if (
                session.query(NCBITaxaNode)
                .filter(NCBITaxaNode.root_id == taxon_id, NCBITaxaNode.taxon_id == taxon_id)
                .one()
            ):
                return True
        except NoResultFound:
            return False
        return False

    @classmethod
    def num_descendants(cls, session: Session, taxon_id: int) -> int:
        """Returns number of descendants from ``taxon_id``

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        """
        session.query(NCBITaxaNode).filter(NCBITaxaNode.taxon_id == taxon_id).one()
        right_index = (
            session.query(NCBITaxaNode.right_index).filter(NCBITaxaNode.taxon_id == taxon_id).scalar()
        )
        left_index = session.query(NCBITaxaNode.left_index).filter(NCBITaxaNode.taxon_id == taxon_id).scalar()
        return (right_index - left_index - 1) / 2

    @classmethod
    def is_leaf(cls, session: Session, taxon_id: int) -> bool:
        """Returns True if ``taxon_id`` is a leaf and False if not

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        """
        if cls.num_descendants(session, taxon_id) == 0:
            return True
        return False

    @classmethod
    def fetch_ancestors(cls, session: Session, taxon_id: int) -> Tuple:
        """Returns a tuple of ancestor node objects from ``taxon_id``

        Args:
            taxon_id: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
            or has no ancestors
        """
        ParentTaxaNode = aliased(NCBITaxaNode)
        q = (
            session.query(ParentTaxaNode, NCBITaxaNode)
            .outerjoin(
                NCBITaxaNode,
                and_(
                    NCBITaxaNode.left_index.between(ParentTaxaNode.left_index, ParentTaxaNode.right_index),
                    ParentTaxaNode.taxon_id != NCBITaxaNode.taxon_id,
                ),
            )
            .filter(NCBITaxaNode.taxon_id == taxon_id)
            .all()
        )
        if not q:
            raise NoResultFound()
        results = []
        for row in q:
            taxon = row[0].__dict__
            results.append(taxon)
        ordered_results = sorted(results, key=lambda x: x["taxon_id"])
        q = tuple(ordered_results)
        return q

    @classmethod
    def all_common_ancestors(cls, session: Session, taxon_id_1: int, taxon_id_2: int) -> tuple:
        """Returns a tuple of common ancestor node objects shared between taxa

        Args:
            taxon_id_1: Unique taxonomy identifier in database
            taxon_id_2: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id_1`` or
            ``taxon_id_2`` do not exist or have no common ancestors
        """
        taxon_1_ancestors = cls.fetch_ancestors(session, taxon_id_1)
        taxon_2_ancestors = cls.fetch_ancestors(session, taxon_id_2)
        if taxon_1_ancestors is None or taxon_2_ancestors is None:
            raise NoResultFound()
        ancestors_1 = list(taxon_1_ancestors)
        ancestors_2 = list(taxon_2_ancestors)
        ancestors_ids_1 = [taxon["taxon_id"] for taxon in ancestors_1]
        ancestors_ids_2 = [taxon["taxon_id"] for taxon in ancestors_2]
        common_ancestors = list(set(ancestors_ids_1).intersection(ancestors_ids_2))
        common_ancestors.sort(key=lambda taxon_id: (-cls.num_descendants(session, taxon_id), taxon_id))
        all_common_ancs = [cls.fetch_node_by_id(session, taxon_id) for taxon_id in common_ancestors]
        return tuple(all_common_ancs)

    @classmethod
    def last_common_ancestor(cls, session: Session, taxon_id_1: int, taxon_id_2: int) -> NCBITaxonomy:
        """Returns most recent common ancestor node object shared between taxa

        Args:
            taxon_id_1: Unique taxonomy identifier in database
            taxon_id_2: Unique taxonomy identifier in database

        Raises:
            sqlalchemy.orm.exc.NoResultFound: if ``taxon_id_1`` or
            ``taxon_id_2`` do not exist or have no common ancestors
        """
        common_ancestors = cls.all_common_ancestors(session, taxon_id_1, taxon_id_2)
        return common_ancestors[0]

all_common_ancestors(session, taxon_id_1, taxon_id_2) classmethod

Returns a tuple of common ancestor node objects shared between taxa

Parameters:

Name Type Description Default
taxon_id_1 int

Unique taxonomy identifier in database

required
taxon_id_2 int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id_1 or

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
@classmethod
def all_common_ancestors(cls, session: Session, taxon_id_1: int, taxon_id_2: int) -> tuple:
    """Returns a tuple of common ancestor node objects shared between taxa

    Args:
        taxon_id_1: Unique taxonomy identifier in database
        taxon_id_2: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id_1`` or
        ``taxon_id_2`` do not exist or have no common ancestors
    """
    taxon_1_ancestors = cls.fetch_ancestors(session, taxon_id_1)
    taxon_2_ancestors = cls.fetch_ancestors(session, taxon_id_2)
    if taxon_1_ancestors is None or taxon_2_ancestors is None:
        raise NoResultFound()
    ancestors_1 = list(taxon_1_ancestors)
    ancestors_2 = list(taxon_2_ancestors)
    ancestors_ids_1 = [taxon["taxon_id"] for taxon in ancestors_1]
    ancestors_ids_2 = [taxon["taxon_id"] for taxon in ancestors_2]
    common_ancestors = list(set(ancestors_ids_1).intersection(ancestors_ids_2))
    common_ancestors.sort(key=lambda taxon_id: (-cls.num_descendants(session, taxon_id), taxon_id))
    all_common_ancs = [cls.fetch_node_by_id(session, taxon_id) for taxon_id in common_ancestors]
    return tuple(all_common_ancs)

children(session, taxon_id) classmethod

Returns taxonomy node object for children nodes

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@classmethod
def children(cls, session: Session, taxon_id: int) -> tuple:
    """Returns taxonomy node object for children nodes

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        or has no children
    """
    q = (
        session.query(NCBITaxonomy)
        .filter(NCBITaxonomy.parent_id == taxon_id)
        .filter(NCBITaxonomy.name_class == "scientific name")
        .all()
    )
    results = list(q)
    rows = [x.__dict__ for x in results]
    q = tuple(rows)
    if not q:
        raise NoResultFound()
    return q

fetch_ancestors(session, taxon_id) classmethod

Returns a tuple of ancestor node objects from taxon_id

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
@classmethod
def fetch_ancestors(cls, session: Session, taxon_id: int) -> Tuple:
    """Returns a tuple of ancestor node objects from ``taxon_id``

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
        or has no ancestors
    """
    ParentTaxaNode = aliased(NCBITaxaNode)
    q = (
        session.query(ParentTaxaNode, NCBITaxaNode)
        .outerjoin(
            NCBITaxaNode,
            and_(
                NCBITaxaNode.left_index.between(ParentTaxaNode.left_index, ParentTaxaNode.right_index),
                ParentTaxaNode.taxon_id != NCBITaxaNode.taxon_id,
            ),
        )
        .filter(NCBITaxaNode.taxon_id == taxon_id)
        .all()
    )
    if not q:
        raise NoResultFound()
    results = []
    for row in q:
        taxon = row[0].__dict__
        results.append(taxon)
    ordered_results = sorted(results, key=lambda x: x["taxon_id"])
    q = tuple(ordered_results)
    return q

fetch_node_by_id(session, taxon_id) classmethod

Returns taxonomy node object by taxon_id

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
@classmethod
def fetch_node_by_id(cls, session: Session, taxon_id: int) -> NCBITaxonomy:
    """Returns taxonomy node object by ``taxon_id``

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
    """
    q = session.query(NCBITaxonomy).filter(NCBITaxonomy.taxon_id == taxon_id).first()
    if not q:
        raise NoResultFound()
    return q

fetch_taxon_by_species_name(session, name) classmethod

Returns first taxonomy object matching name

Parameters:

Name Type Description Default
name str

Scientific ncbi_taxa_name.name in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@classmethod
def fetch_taxon_by_species_name(cls, session: Session, name: str) -> NCBITaxonomy:
    """Returns first taxonomy object matching ``name``

    Args:
        name: Scientific ncbi_taxa_name.name in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
    """
    q = (
        session.query(NCBITaxonomy)
        .filter(NCBITaxonomy.name == (name.replace("_", " ")))
        .filter(NCBITaxonomy.name_class == "scientific name")
        .first()
    )
    if not q:
        raise NoResultFound()
    return q

is_leaf(session, taxon_id) classmethod

Returns True if taxon_id is a leaf and False if not

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
168
169
170
171
172
173
174
175
176
177
178
179
180
@classmethod
def is_leaf(cls, session: Session, taxon_id: int) -> bool:
    """Returns True if ``taxon_id`` is a leaf and False if not

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
    """
    if cls.num_descendants(session, taxon_id) == 0:
        return True
    return False

is_root(session, taxon_id) classmethod

Returns True if taxon_id is a root and False if not

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required
Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
@classmethod
def is_root(cls, session: Session, taxon_id: int) -> bool:
    """Returns True if ``taxon_id`` is a root and False if not

    Args:
        taxon_id: Unique taxonomy identifier in database
    """
    try:
        if (
            session.query(NCBITaxaNode)
            .filter(NCBITaxaNode.root_id == taxon_id, NCBITaxaNode.taxon_id == taxon_id)
            .one()
        ):
            return True
    except NoResultFound:
        return False
    return False

last_common_ancestor(session, taxon_id_1, taxon_id_2) classmethod

Returns most recent common ancestor node object shared between taxa

Parameters:

Name Type Description Default
taxon_id_1 int

Unique taxonomy identifier in database

required
taxon_id_2 int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id_1 or

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
@classmethod
def last_common_ancestor(cls, session: Session, taxon_id_1: int, taxon_id_2: int) -> NCBITaxonomy:
    """Returns most recent common ancestor node object shared between taxa

    Args:
        taxon_id_1: Unique taxonomy identifier in database
        taxon_id_2: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id_1`` or
        ``taxon_id_2`` do not exist or have no common ancestors
    """
    common_ancestors = cls.all_common_ancestors(session, taxon_id_1, taxon_id_2)
    return common_ancestors[0]

num_descendants(session, taxon_id) classmethod

Returns number of descendants from taxon_id

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
@classmethod
def num_descendants(cls, session: Session, taxon_id: int) -> int:
    """Returns number of descendants from ``taxon_id``

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
    """
    session.query(NCBITaxaNode).filter(NCBITaxaNode.taxon_id == taxon_id).one()
    right_index = (
        session.query(NCBITaxaNode.right_index).filter(NCBITaxaNode.taxon_id == taxon_id).scalar()
    )
    left_index = session.query(NCBITaxaNode.left_index).filter(NCBITaxaNode.taxon_id == taxon_id).scalar()
    return (right_index - left_index - 1) / 2

parent(session, taxon_id) classmethod

Returns taxonomy node object for parent node

Parameters:

Name Type Description Default
taxon_id int

Unique taxonomy identifier in database

required

Raises:

Type Description
NoResultFound

if taxon_id does not exist

Source code in src/python/ensembl/ncbi_taxonomy/api/utils.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@classmethod
def parent(cls, session: Session, taxon_id: int) -> NCBITaxonomy:
    """Returns taxonomy node object for parent node

    Args:
        taxon_id: Unique taxonomy identifier in database

    Raises:
        sqlalchemy.orm.exc.NoResultFound: if ``taxon_id`` does not exist
    """
    ParentTaxonomy = aliased(NCBITaxonomy, name="parent_ncbi_taxonomy")
    q = (
        session.query(NCBITaxonomy, ParentTaxonomy)
        .outerjoin(ParentTaxonomy, NCBITaxonomy.parent_id == ParentTaxonomy.taxon_id)
        .filter(NCBITaxonomy.taxon_id == taxon_id)
        .filter(ParentTaxonomy.name_class == "scientific name")
        .first()
    )
    try:
        return q[1]
    except TypeError as exc:
        raise NoResultFound() from exc