Skip to content

dump

ensembl.io.genomio.events.dump

Module to dump stable id events from an Ensembl Core database

BRC4_START_DATE = datetime(2020, 5, 1) module-attribute

DictToIdsSet = Dict[str, IdsSet] module-attribute

IdsSet = Set[str] module-attribute

DumpStableIDs

An processor that create events from pairs of ids and can print those events out.

Attributes:

Name Type Description
server

a core server set to a database, to retrieve the data from.

Source code in src/python/ensembl/io/genomio/events/dump.py
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
class DumpStableIDs:
    """An processor that create events from pairs of ids and can print those events out.

    Attributes:
        server: a core server set to a database, to retrieve the data from.

    """

    def __init__(self, session: Session) -> None:
        """Create a processor for events"""
        self.session = session

    def get_history(self) -> List:
        """Retrieve all events from a database.

        Returns:
            A list of all events.

        """

        sessions = self.get_mapping_sessions()

        events = []
        for session in sessions:
            logging.info(f"Mapping session {session.new_release}")
            pairs = self.get_pairs(session.mapping_session_id)
            session_events = self.make_events(pairs)
            for event in session_events:
                event.set_release(session.new_release)
                event.set_date(session.created)
            events += session_events

        # Then analyse the pairs to make events
        return events

    def print_events(self, events: List[Event], output_file: Path) -> None:
        """Print events in a format for BRC.

        Args:
            events: list of events for a given genome.
            output_file: where the events will be printed.

        """
        if not events:
            logging.info("No events to print")
            return
        with output_file.open("w") as out_fh:
            for event in events:
                event_lines = event.brc_format_2()
                for line in event_lines:
                    out_fh.write(line + "\n")

    def get_mapping_sessions(self) -> List[MappingSession]:
        """Retrieve the mapping sessions from the connected database.

        Returns:
            A list of sessions.

        """
        map_sessions_stmt = select(MappingSession)
        map_sessions = list(self.session.scalars(map_sessions_stmt).unique().all())
        return map_sessions

    def get_pairs(self, session_id: int) -> List[Pair]:
        """Retrieve all pair of ids for a given session.

        Args:
            session_id: id of a session from the connected database.

        Returns:
            All pairs of IDs.

        """

        id_events_stmt = (
            select(StableIdEvent)
            .where(
                and_(
                    (StableIdEvent.mapping_session_id == session_id),
                    (StableIdEvent.id_type == "gene"),
                    (
                        or_(
                            (StableIdEvent.old_stable_id.is_(None)),
                            (StableIdEvent.new_stable_id.is_(None)),
                            (StableIdEvent.old_stable_id != StableIdEvent.new_stable_id),
                        )
                    ),
                )
            )
            .group_by(
                StableIdEvent.old_stable_id, StableIdEvent.new_stable_id, StableIdEvent.mapping_session_id
            )
        )
        pairs: List[Pair] = []
        for row in self.session.scalars(id_events_stmt).unique().all():
            pair = Pair(row.old_stable_id, row.new_stable_id)
            pairs.append(pair)
        return pairs

    def make_events(self, pairs: List[Pair]) -> List:
        """Given a list of pairs, create events.

        Args:
            pairs: list of Pair.

        Return:
            A list of events.

        """

        from_list, to_list = self.get_pairs_from_to(pairs)

        # Create events with those 2 dicts
        events: List[Event] = []
        for old_id, from_old_list in from_list.items():
            if not old_id or old_id not in from_list:
                continue
            event = Event(set([old_id]), set(from_old_list))
            (event, from_list, to_list) = self.extend_event(event, from_list, to_list)
            event.add_pairs(pairs)
            events.append(event)

        # Remaining events should only be new genes
        for new_id, to_new_list in to_list.items():
            if not new_id:
                continue
            event = Event(set(to_new_list), set([new_id]))
            event.add_pairs(pairs)
            events.append(event)

        stats = {}
        for event in events:
            name = event.get_name()
            event.clean_pairs()
            if name not in stats:
                stats[name] = 1
            else:
                stats[name] += 1

        for stat, value in stats.items():
            logging.info(f"\t{stat} = {value}")

        return events

    @staticmethod
    def get_pairs_from_to(pairs: List[Pair]) -> Tuple[DictToIdsSet, DictToIdsSet]:
        """
        From a list of Pairs, extract a mapping of all ids from a given old id (from_list),
        and a mapping of all ids to a given new id (to_list).

        Args:
            pairs: list of Pairs.

        Return:
             Tuple of 2 values:
                from_list
                to_list

        """
        from_list: DictToIdsSet = {}
        to_list: DictToIdsSet = {}
        for pair in pairs:
            old_id = pair.old_id
            new_id = pair.new_id
            if old_id is None:
                old_id = ""
            if new_id is None:
                new_id = ""

            if old_id in from_list:
                from_list[old_id].add(new_id)
            else:
                from_list[old_id] = set([new_id])

            if new_id in to_list:
                to_list[new_id].add(old_id)
            else:
                to_list[new_id] = set([old_id])

        # Remove empty elements
        for from_id in from_list:
            from_list[from_id] = Event.clean_set(from_list[from_id])
        for to_id in to_list:
            to_list[to_id] = Event.clean_set(to_list[to_id])

        return from_list, to_list

    def extend_event(
        self, event: Event, from_list: DictToIdsSet, to_list: DictToIdsSet
    ) -> Tuple[Event, DictToIdsSet, DictToIdsSet]:
        """Given an event, aggregate ids in the 'from' and 'to' sets, to connect the whole group.

        Args:
            event: the event to extend.
            from_list: A dict a the from ids, and their corresponding to ids.
            to_list: A dict of the to ids, and their corresponding from ids.

        Returns:
            A tuple of the extended event, and the from_list and to_list from which the ids that
            have been added to the event have been removed.

        """

        extended = True

        while extended:
            extended = False

            # Extend the group in the to ids
            for to_id in event.to_set:
                if to_id in to_list:
                    to_from_ids: IdsSet = to_list[to_id]
                    # Add to the from list?
                    for to_from_id in to_from_ids:
                        if to_from_id not in event.from_set:
                            event.add_from(to_from_id)
                            extended = True

            # Extend the group in the from ids
            for from_id in event.from_set:
                if from_id in from_list:
                    from_to_ids = from_list[from_id]
                    # Add to the to list?
                    for from_to_id in from_to_ids:
                        if from_to_id not in event.to_set:
                            event.add_to(from_to_id)
                            extended = True

        # Clean up
        from_list = {from_id: from_list[from_id] for from_id in from_list if from_id not in event.from_set}
        to_list = {to_id: to_list[to_id] for to_id in to_list if to_id not in event.to_set}

        return (event, from_list, to_list)

session = session instance-attribute

extend_event(event, from_list, to_list)

Given an event, aggregate ids in the 'from' and 'to' sets, to connect the whole group.

Parameters:

Name Type Description Default
event Event

the event to extend.

required
from_list DictToIdsSet

A dict a the from ids, and their corresponding to ids.

required
to_list DictToIdsSet

A dict of the to ids, and their corresponding from ids.

required

Returns:

Type Description
Event

A tuple of the extended event, and the from_list and to_list from which the ids that

DictToIdsSet

have been added to the event have been removed.

Source code in src/python/ensembl/io/genomio/events/dump.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
def extend_event(
    self, event: Event, from_list: DictToIdsSet, to_list: DictToIdsSet
) -> Tuple[Event, DictToIdsSet, DictToIdsSet]:
    """Given an event, aggregate ids in the 'from' and 'to' sets, to connect the whole group.

    Args:
        event: the event to extend.
        from_list: A dict a the from ids, and their corresponding to ids.
        to_list: A dict of the to ids, and their corresponding from ids.

    Returns:
        A tuple of the extended event, and the from_list and to_list from which the ids that
        have been added to the event have been removed.

    """

    extended = True

    while extended:
        extended = False

        # Extend the group in the to ids
        for to_id in event.to_set:
            if to_id in to_list:
                to_from_ids: IdsSet = to_list[to_id]
                # Add to the from list?
                for to_from_id in to_from_ids:
                    if to_from_id not in event.from_set:
                        event.add_from(to_from_id)
                        extended = True

        # Extend the group in the from ids
        for from_id in event.from_set:
            if from_id in from_list:
                from_to_ids = from_list[from_id]
                # Add to the to list?
                for from_to_id in from_to_ids:
                    if from_to_id not in event.to_set:
                        event.add_to(from_to_id)
                        extended = True

    # Clean up
    from_list = {from_id: from_list[from_id] for from_id in from_list if from_id not in event.from_set}
    to_list = {to_id: to_list[to_id] for to_id in to_list if to_id not in event.to_set}

    return (event, from_list, to_list)

get_history()

Retrieve all events from a database.

Returns:

Type Description
List

A list of all events.

Source code in src/python/ensembl/io/genomio/events/dump.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def get_history(self) -> List:
    """Retrieve all events from a database.

    Returns:
        A list of all events.

    """

    sessions = self.get_mapping_sessions()

    events = []
    for session in sessions:
        logging.info(f"Mapping session {session.new_release}")
        pairs = self.get_pairs(session.mapping_session_id)
        session_events = self.make_events(pairs)
        for event in session_events:
            event.set_release(session.new_release)
            event.set_date(session.created)
        events += session_events

    # Then analyse the pairs to make events
    return events

get_mapping_sessions()

Retrieve the mapping sessions from the connected database.

Returns:

Type Description
List[MappingSession]

A list of sessions.

Source code in src/python/ensembl/io/genomio/events/dump.py
355
356
357
358
359
360
361
362
363
364
def get_mapping_sessions(self) -> List[MappingSession]:
    """Retrieve the mapping sessions from the connected database.

    Returns:
        A list of sessions.

    """
    map_sessions_stmt = select(MappingSession)
    map_sessions = list(self.session.scalars(map_sessions_stmt).unique().all())
    return map_sessions

get_pairs(session_id)

Retrieve all pair of ids for a given session.

Parameters:

Name Type Description Default
session_id int

id of a session from the connected database.

required

Returns:

Type Description
List[Pair]

All pairs of IDs.

Source code in src/python/ensembl/io/genomio/events/dump.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
def get_pairs(self, session_id: int) -> List[Pair]:
    """Retrieve all pair of ids for a given session.

    Args:
        session_id: id of a session from the connected database.

    Returns:
        All pairs of IDs.

    """

    id_events_stmt = (
        select(StableIdEvent)
        .where(
            and_(
                (StableIdEvent.mapping_session_id == session_id),
                (StableIdEvent.id_type == "gene"),
                (
                    or_(
                        (StableIdEvent.old_stable_id.is_(None)),
                        (StableIdEvent.new_stable_id.is_(None)),
                        (StableIdEvent.old_stable_id != StableIdEvent.new_stable_id),
                    )
                ),
            )
        )
        .group_by(
            StableIdEvent.old_stable_id, StableIdEvent.new_stable_id, StableIdEvent.mapping_session_id
        )
    )
    pairs: List[Pair] = []
    for row in self.session.scalars(id_events_stmt).unique().all():
        pair = Pair(row.old_stable_id, row.new_stable_id)
        pairs.append(pair)
    return pairs

get_pairs_from_to(pairs) staticmethod

From a list of Pairs, extract a mapping of all ids from a given old id (from_list), and a mapping of all ids to a given new id (to_list).

Parameters:

Name Type Description Default
pairs List[Pair]

list of Pairs.

required
Return

Tuple of 2 values: from_list to_list

Source code in src/python/ensembl/io/genomio/events/dump.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
@staticmethod
def get_pairs_from_to(pairs: List[Pair]) -> Tuple[DictToIdsSet, DictToIdsSet]:
    """
    From a list of Pairs, extract a mapping of all ids from a given old id (from_list),
    and a mapping of all ids to a given new id (to_list).

    Args:
        pairs: list of Pairs.

    Return:
         Tuple of 2 values:
            from_list
            to_list

    """
    from_list: DictToIdsSet = {}
    to_list: DictToIdsSet = {}
    for pair in pairs:
        old_id = pair.old_id
        new_id = pair.new_id
        if old_id is None:
            old_id = ""
        if new_id is None:
            new_id = ""

        if old_id in from_list:
            from_list[old_id].add(new_id)
        else:
            from_list[old_id] = set([new_id])

        if new_id in to_list:
            to_list[new_id].add(old_id)
        else:
            to_list[new_id] = set([old_id])

    # Remove empty elements
    for from_id in from_list:
        from_list[from_id] = Event.clean_set(from_list[from_id])
    for to_id in to_list:
        to_list[to_id] = Event.clean_set(to_list[to_id])

    return from_list, to_list

make_events(pairs)

Given a list of pairs, create events.

Parameters:

Name Type Description Default
pairs List[Pair]

list of Pair.

required
Return

A list of events.

Source code in src/python/ensembl/io/genomio/events/dump.py
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
def make_events(self, pairs: List[Pair]) -> List:
    """Given a list of pairs, create events.

    Args:
        pairs: list of Pair.

    Return:
        A list of events.

    """

    from_list, to_list = self.get_pairs_from_to(pairs)

    # Create events with those 2 dicts
    events: List[Event] = []
    for old_id, from_old_list in from_list.items():
        if not old_id or old_id not in from_list:
            continue
        event = Event(set([old_id]), set(from_old_list))
        (event, from_list, to_list) = self.extend_event(event, from_list, to_list)
        event.add_pairs(pairs)
        events.append(event)

    # Remaining events should only be new genes
    for new_id, to_new_list in to_list.items():
        if not new_id:
            continue
        event = Event(set(to_new_list), set([new_id]))
        event.add_pairs(pairs)
        events.append(event)

    stats = {}
    for event in events:
        name = event.get_name()
        event.clean_pairs()
        if name not in stats:
            stats[name] = 1
        else:
            stats[name] += 1

    for stat, value in stats.items():
        logging.info(f"\t{stat} = {value}")

    return events

print_events(events, output_file)

Print events in a format for BRC.

Parameters:

Name Type Description Default
events List[Event]

list of events for a given genome.

required
output_file Path

where the events will be printed.

required
Source code in src/python/ensembl/io/genomio/events/dump.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
def print_events(self, events: List[Event], output_file: Path) -> None:
    """Print events in a format for BRC.

    Args:
        events: list of events for a given genome.
        output_file: where the events will be printed.

    """
    if not events:
        logging.info("No events to print")
        return
    with output_file.open("w") as out_fh:
        for event in events:
            event_lines = event.brc_format_2()
            for line in event_lines:
                out_fh.write(line + "\n")

Event

Represents a stable id event from one gene set version to another one. Various events: - new genes - deleted genes - merged genes (several genes to one) - split genes (one gene to several) - mixed (several genes to several)

Attributes:

Name Type Description
from_list

List of genes the previous gene set.

to_list

List of genes in the new gene set.

release

New gene set release name.

date

Date of the new gene set.

name

Name of the event (will be updated automatically).

pairs List[Pair]

All pair of ids for this event.

Any gene set before 2019-09 is dubbed pre-BRC4.

Source code in src/python/ensembl/io/genomio/events/dump.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
class Event:
    """Represents a stable id event from one gene set version to another one. Various events:
    - new genes
    - deleted genes
    - merged genes (several genes to one)
    - split genes (one gene to several)
    - mixed (several genes to several)

    Attributes:
        from_list: List of genes the previous gene set.
        to_list: List of genes in the new gene set.
        release: New gene set release name.
        date: Date of the new gene set.
        name: Name of the event (will be updated automatically).
        pairs: All pair of ids for this event.

    Any gene set before 2019-09 is dubbed pre-BRC4.

    """

    def __init__(
        self,
        from_list: Optional[Set[str]] = None,
        to_list: Optional[Set[str]] = None,
        release: Optional[str] = None,
        date: Optional[datetime] = None,
    ) -> None:
        """Create a stable id event from a set of old_ids to a set of new_ids"""

        if from_list is None:
            from_list = set()
        if to_list is None:
            to_list = set()
        self.from_set = self.clean_set(from_list)
        self.to_set = self.clean_set(to_list)
        self.release = release
        self.date = date
        self.name = ""
        self.pairs: List[Pair] = []

    def __str__(self) -> str:
        """String representation of the stable id event"""

        from_str = ",".join(self.from_set)
        to_str = ",".join(self.to_set)
        return f"From {from_str} to {to_str} = {self.get_name()} in release {self.release}"

    def brc_format_1(self) -> List[str]:
        """Returns a list events, one line per initial ID, in the following TSV format:
        - old gene id
        - event name
        - release
        - release date
        - list of old gene ids in the event (comma-separated)
        - list of new gene ids in the event (comma-separated)

        """
        from_str = ",".join(self.from_set)
        to_str = ",".join(self.to_set)
        release = self.get_full_release()
        if self.date:
            date = self.date.strftime("%Y-%m")
        else:
            date = "no_date"
        name = self.get_name()
        line_list = []
        for identifier in self.from_set:
            line = [
                identifier,
                name,
                release,
                date,
            ]
            if name in ("merge", "split", "mixed", "change"):
                line.append(from_str)
                line.append(to_str)
            else:
                line += ["", ""]
            line_list.append("\t".join(line))

        if self.get_name() == "new":
            new_id = [self.to_set][0]
            line = [new_id, name, release, date, "", ""]
            line_list.append("\t".join(line))
        return line_list

    def brc_format_2(self) -> List[str]:
        """Returns a list of combination of genes, one line per combination of old_id - new_ids, in the
        following TSV format:
        - old gene id
        - new gene id
        - event name
        - release
        - release date

        """
        release = self.get_full_release()
        if self.date:
            date = self.date.strftime("%Y-%m")
        else:
            date = "no_date"
        name = self.get_name()
        line_list = []

        for pair in self.pairs:
            line = [
                pair.old_id,
                pair.new_id,
                name,
                release,
                date,
            ]
            line_list.append("\t".join(line))
        return line_list

    @staticmethod
    def clean_set(this_list: Set) -> Set:
        """Removes any empty elements from a list.

        Args:
            this_list: list of items, so of which can be empty/None.

        Returns:
            The cleaned list.

        """
        return {identifier for identifier in this_list if identifier}

    def add_from(self, from_id: str) -> None:
        """Store an id in the from_set."""
        if from_id:
            self.from_set.add(from_id)

    def add_to(self, to_id: str) -> None:
        """Store an id in the from_set."""
        if to_id:
            self.to_set.add(to_id)

    def set_release(self, release: str) -> None:
        """Set the release name of the event"""
        self.release = release

    def set_date(self, date: datetime) -> None:
        """Set the date of the release for this event"""
        self.date = date

    def add_pair(self, pair: Pair) -> None:
        """Keeps a record of this pair.

        Args:
            pair: a Pair to record.

        Raises:
            ValueError: can't add an empty pair.

        """
        if pair.is_empty():
            raise ValueError(f"Expected at least one value in the given pair {pair}")
        self.pairs.append(pair)

    def get_full_release(self) -> str:
        """Returns the expanded release name, pre-BRC4 or `BRC4 = build`."""
        release = self.release
        date = self.date

        if date and date > BRC4_START_DATE:
            release = f"build {release}"
        else:
            release = f"pre-BRC4 {release}"

        return release

    def _name_event(self) -> None:
        """Identify the event name based on the old vs new id lists."""
        if not self.from_set and len(self.to_set) == 1:
            self.name = "new"
        elif not self.to_set and len(self.from_set) == 1:
            self.name = "deletion"
        elif len(self.from_set) == 1 and len(self.to_set) == 1:
            self.name = "change"
        elif len(self.from_set) == 1 and len(self.to_set) > 1:
            self.name = "split"
        elif len(self.from_set) > 1 and len(self.to_set) == 1:
            self.name = "merge"
        elif len(self.from_set) > 1 and len(self.to_set) > 1:
            self.name = "mixed"
        else:
            raise UnsupportedEvent(f"Event {self.from_set} to {self.to_set} is not supported")

    def clean_pairs(self) -> None:
        """Remove the empty old pairs when the event is not 'new'."""
        if not self.name:
            self._name_event()

        if self.name != "new":
            new_pairs = []
            for pair in self.pairs:
                if not pair.has_old_id():
                    continue
                new_pairs.append(pair)
            self.pairs = new_pairs

    def get_name(self) -> str:
        """Retrieve the name for this event, update it beforehand."""
        self._name_event()
        return self.name

    def add_pairs(self, pairs: List[Pair]) -> None:
        """Provided all the pairs, keep those that are used by this event.

        Args:
            pairs: list of Pair.

        """
        for pair in pairs:
            if (pair.has_old_id() and pair.old_id in self.from_set) or (
                pair.has_new_id() and pair.new_id in self.to_set
            ):
                # Core db contains an empty line to signify that an old id has been removed
                # in merge/split/mixed
                name = self.get_name()
                if (name != "deletion") and not pair.has_new_id():
                    continue
                self.add_pair(pair)

date = date instance-attribute

from_set = self.clean_set(from_list) instance-attribute

name = '' instance-attribute

pairs = [] instance-attribute

release = release instance-attribute

to_set = self.clean_set(to_list) instance-attribute

add_from(from_id)

Store an id in the from_set.

Source code in src/python/ensembl/io/genomio/events/dump.py
205
206
207
208
def add_from(self, from_id: str) -> None:
    """Store an id in the from_set."""
    if from_id:
        self.from_set.add(from_id)

add_pair(pair)

Keeps a record of this pair.

Parameters:

Name Type Description Default
pair Pair

a Pair to record.

required

Raises:

Type Description
ValueError

can't add an empty pair.

Source code in src/python/ensembl/io/genomio/events/dump.py
223
224
225
226
227
228
229
230
231
232
233
234
235
def add_pair(self, pair: Pair) -> None:
    """Keeps a record of this pair.

    Args:
        pair: a Pair to record.

    Raises:
        ValueError: can't add an empty pair.

    """
    if pair.is_empty():
        raise ValueError(f"Expected at least one value in the given pair {pair}")
    self.pairs.append(pair)

add_pairs(pairs)

Provided all the pairs, keep those that are used by this event.

Parameters:

Name Type Description Default
pairs List[Pair]

list of Pair.

required
Source code in src/python/ensembl/io/genomio/events/dump.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def add_pairs(self, pairs: List[Pair]) -> None:
    """Provided all the pairs, keep those that are used by this event.

    Args:
        pairs: list of Pair.

    """
    for pair in pairs:
        if (pair.has_old_id() and pair.old_id in self.from_set) or (
            pair.has_new_id() and pair.new_id in self.to_set
        ):
            # Core db contains an empty line to signify that an old id has been removed
            # in merge/split/mixed
            name = self.get_name()
            if (name != "deletion") and not pair.has_new_id():
                continue
            self.add_pair(pair)

add_to(to_id)

Store an id in the from_set.

Source code in src/python/ensembl/io/genomio/events/dump.py
210
211
212
213
def add_to(self, to_id: str) -> None:
    """Store an id in the from_set."""
    if to_id:
        self.to_set.add(to_id)

brc_format_1()

Returns a list events, one line per initial ID, in the following TSV format: - old gene id - event name - release - release date - list of old gene ids in the event (comma-separated) - list of new gene ids in the event (comma-separated)

Source code in src/python/ensembl/io/genomio/events/dump.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def brc_format_1(self) -> List[str]:
    """Returns a list events, one line per initial ID, in the following TSV format:
    - old gene id
    - event name
    - release
    - release date
    - list of old gene ids in the event (comma-separated)
    - list of new gene ids in the event (comma-separated)

    """
    from_str = ",".join(self.from_set)
    to_str = ",".join(self.to_set)
    release = self.get_full_release()
    if self.date:
        date = self.date.strftime("%Y-%m")
    else:
        date = "no_date"
    name = self.get_name()
    line_list = []
    for identifier in self.from_set:
        line = [
            identifier,
            name,
            release,
            date,
        ]
        if name in ("merge", "split", "mixed", "change"):
            line.append(from_str)
            line.append(to_str)
        else:
            line += ["", ""]
        line_list.append("\t".join(line))

    if self.get_name() == "new":
        new_id = [self.to_set][0]
        line = [new_id, name, release, date, "", ""]
        line_list.append("\t".join(line))
    return line_list

brc_format_2()

Returns a list of combination of genes, one line per combination of old_id - new_ids, in the following TSV format: - old gene id - new gene id - event name - release - release date

Source code in src/python/ensembl/io/genomio/events/dump.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def brc_format_2(self) -> List[str]:
    """Returns a list of combination of genes, one line per combination of old_id - new_ids, in the
    following TSV format:
    - old gene id
    - new gene id
    - event name
    - release
    - release date

    """
    release = self.get_full_release()
    if self.date:
        date = self.date.strftime("%Y-%m")
    else:
        date = "no_date"
    name = self.get_name()
    line_list = []

    for pair in self.pairs:
        line = [
            pair.old_id,
            pair.new_id,
            name,
            release,
            date,
        ]
        line_list.append("\t".join(line))
    return line_list

clean_pairs()

Remove the empty old pairs when the event is not 'new'.

Source code in src/python/ensembl/io/genomio/events/dump.py
266
267
268
269
270
271
272
273
274
275
276
277
def clean_pairs(self) -> None:
    """Remove the empty old pairs when the event is not 'new'."""
    if not self.name:
        self._name_event()

    if self.name != "new":
        new_pairs = []
        for pair in self.pairs:
            if not pair.has_old_id():
                continue
            new_pairs.append(pair)
        self.pairs = new_pairs

clean_set(this_list) staticmethod

Removes any empty elements from a list.

Parameters:

Name Type Description Default
this_list Set

list of items, so of which can be empty/None.

required

Returns:

Type Description
Set

The cleaned list.

Source code in src/python/ensembl/io/genomio/events/dump.py
192
193
194
195
196
197
198
199
200
201
202
203
@staticmethod
def clean_set(this_list: Set) -> Set:
    """Removes any empty elements from a list.

    Args:
        this_list: list of items, so of which can be empty/None.

    Returns:
        The cleaned list.

    """
    return {identifier for identifier in this_list if identifier}

get_full_release()

Returns the expanded release name, pre-BRC4 or BRC4 = build.

Source code in src/python/ensembl/io/genomio/events/dump.py
237
238
239
240
241
242
243
244
245
246
247
def get_full_release(self) -> str:
    """Returns the expanded release name, pre-BRC4 or `BRC4 = build`."""
    release = self.release
    date = self.date

    if date and date > BRC4_START_DATE:
        release = f"build {release}"
    else:
        release = f"pre-BRC4 {release}"

    return release

get_name()

Retrieve the name for this event, update it beforehand.

Source code in src/python/ensembl/io/genomio/events/dump.py
279
280
281
282
def get_name(self) -> str:
    """Retrieve the name for this event, update it beforehand."""
    self._name_event()
    return self.name

set_date(date)

Set the date of the release for this event

Source code in src/python/ensembl/io/genomio/events/dump.py
219
220
221
def set_date(self, date: datetime) -> None:
    """Set the date of the release for this event"""
    self.date = date

set_release(release)

Set the release name of the event

Source code in src/python/ensembl/io/genomio/events/dump.py
215
216
217
def set_release(self, release: str) -> None:
    """Set the release name of the event"""
    self.release = release

Pair

Simple old_id - new_id pair representation

Source code in src/python/ensembl/io/genomio/events/dump.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class Pair:
    """Simple old_id - new_id pair representation"""

    def __init__(self, old_id: Optional[str], new_id: Optional[str]) -> None:
        """Create a pair with an old_id and a new_id if provided"""

        self.old_id = old_id if old_id is not None else ""
        if new_id is not None:
            self.new_id = new_id
        else:
            self.new_id = ""

    def has_old_id(self) -> bool:
        """Check if the pair has an old_id"""
        return self.old_id != ""

    def has_new_id(self) -> bool:
        """Check if the pair has a new_id"""
        return self.new_id != ""

    def is_empty(self) -> bool:
        """Test if the current pair has no id."""

        return not (self.has_old_id() or self.has_new_id())

new_id = new_id instance-attribute

old_id = old_id if old_id is not None else '' instance-attribute

has_new_id()

Check if the pair has a new_id

Source code in src/python/ensembl/io/genomio/events/dump.py
63
64
65
def has_new_id(self) -> bool:
    """Check if the pair has a new_id"""
    return self.new_id != ""

has_old_id()

Check if the pair has an old_id

Source code in src/python/ensembl/io/genomio/events/dump.py
59
60
61
def has_old_id(self) -> bool:
    """Check if the pair has an old_id"""
    return self.old_id != ""

is_empty()

Test if the current pair has no id.

Source code in src/python/ensembl/io/genomio/events/dump.py
67
68
69
70
def is_empty(self) -> bool:
    """Test if the current pair has no id."""

    return not (self.has_old_id() or self.has_new_id())

UnsupportedEvent

Bases: ValueError

If an event is not supported

Source code in src/python/ensembl/io/genomio/events/dump.py
73
74
class UnsupportedEvent(ValueError):
    """If an event is not supported"""

main()

Main entrypoint

Source code in src/python/ensembl/io/genomio/events/dump.py
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
def main() -> None:
    """Main entrypoint"""
    parser = ArgumentParser(
        description="Dump the stable ID events from the information available in a core database."
    )
    parser.add_server_arguments(include_database=True)
    parser.add_argument_dst_path("--output_file", required=True, help="Output file")
    parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__)
    parser.add_log_arguments(add_log_file=True)
    args = parser.parse_args()
    init_logging_with_args(args)

    dbc = DBConnectionLite(args.url)
    with dbc.session_scope() as session:
        dumper = DumpStableIDs(session)
    events = dumper.get_history()
    dumper.print_events(events, args.output_file)