Skip to content

format

ensembl.io.genomio.events.format

Module to map stable ids in a file, given a mapping.

IdsMapper

Simple mapper object, to cleanly get a mapping dict.

Source code in src/python/ensembl/io/genomio/events/format.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class IdsMapper:
    """Simple mapper object, to cleanly get a mapping dict."""

    def __init__(self, map_file: PathLike) -> None:
        self.map = self._load_mapping(Path(map_file))

    def _load_mapping(self, map_file: Path) -> Dict[str, str]:
        """Return a mapping in a simple dict from a tab file with 2 columns: from_id, to_id.

        Args:
            map_file: Tab file path.
        """
        mapping = {}
        with map_file.open("r") as map_fh:
            for line in map_fh:
                if line == "":
                    continue
                items = line.split("\t")
                if len(items) < 2:
                    raise ValueError(f"Not 2 elements in {line}")
                (from_id, to_id) = items[0:2]
                mapping[from_id] = to_id

        return mapping

map = self._load_mapping(Path(map_file)) instance-attribute

load_list(list_file)

Return a simple list from a file.

Source code in src/python/ensembl/io/genomio/events/format.py
56
57
58
59
60
61
62
63
64
65
66
67
def load_list(list_file: Path) -> List[str]:
    """Return a simple list from a file."""
    items = set()
    empty_spaces = re.compile(r"\s+")
    with Path(list_file).open("r") as map_fh:
        for line in map_fh:
            line = re.sub(empty_spaces, "", line)
            if line == "":
                continue
            items.add(line)

    return list(items)

main()

Main entrypoint

Source code in src/python/ensembl/io/genomio/events/format.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def main() -> None:
    """Main entrypoint"""
    parser = ArgumentParser(description="Map stable IDs in a file and produce an events file.")
    parser.add_argument_src_path("--input_file", required=True, help="Input file from gene_diff")
    parser.add_argument_src_path(
        "--deletes_file", required=True, help="Deleted genes file (apart from the deletes from the gene diff)"
    )
    parser.add_argument_src_path(
        "--map_file", required=True, help="Mapping tab file with 2 columns: old_id, new_id"
    )
    parser.add_argument("--release_name", required=True, metavar="NAME", help="Release name for all events")
    parser.add_argument("--release_date", required=True, metavar="DATE", help="Release date for all events")
    parser.add_argument_dst_path("--output_file", required=True, help="Output formatted event file")
    parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__)
    parser.add_log_arguments()
    args = parser.parse_args()
    init_logging_with_args(args)

    events = EventCollection()
    deleted_genes = load_list(args.deletes_file)
    events.add_deletes(deleted_genes, args.release_name, args.release_date)
    events.load_events_from_gene_diff(args.input_file, args.release_name, args.release_date)
    mapper = IdsMapper(args.map_file)
    events.remap_to_ids(mapper.map)
    events.write_events_to_file(args.output_file)