Coverage for src / python / ensembl / xrefs / xref_update_db_model.py: 100%
192 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 17:13 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 17:13 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Xref Update database ORM."""
15# Ignore some pylint and mypy checks due to the nature of SQLAlchemy ORMs
16# pylint: disable=missing-class-docstring,too-many-lines
17# mypy: disable-error-code="misc, valid-type"
19from sqlalchemy import Column, Index, Enum, DateTime, text
20from sqlalchemy.dialects.mysql import (
21 INTEGER,
22 VARCHAR,
23 TEXT,
24 MEDIUMTEXT,
25 TINYINT,
26 CHAR,
27 SMALLINT,
28 DOUBLE,
29)
30from sqlalchemy.orm import declarative_base
32Base = declarative_base()
35class Xref(Base):
36 __tablename__ = "xref"
37 __table_args__ = (
38 Index(
39 "acession_idx",
40 "accession",
41 "source_id",
42 "species_id",
43 "label",
44 unique=True,
45 mysql_length={"accession": 100, "label": 100},
46 ),
47 Index("species_source_idx", "species_id", "source_id"),
48 )
50 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
51 accession: Column = Column(VARCHAR(255), nullable=False)
52 version: Column = Column(INTEGER(10, unsigned=True))
53 label: Column = Column(VARCHAR(255))
54 description: Column = Column(TEXT)
55 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
56 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
57 info_type: Column = Column(
58 Enum(
59 "NONE",
60 "PROJECTION",
61 "MISC",
62 "DEPENDENT",
63 "DIRECT",
64 "SEQUENCE_MATCH",
65 "INFERRED_PAIR",
66 "PROBE",
67 "UNMAPPED",
68 "COORDINATE_OVERLAP",
69 "CHECKSUM",
70 ),
71 nullable=False,
72 server_default=text("'NONE'"),
73 )
74 info_text: Column = Column(VARCHAR(255), nullable=False, server_default=text("''"))
75 dumped: Column = Column(
76 Enum(
77 "MAPPED",
78 "NO_DUMP_ANOTHER_PRIORITY",
79 "UNMAPPED_NO_MAPPING",
80 "UNMAPPED_NO_MASTER",
81 "UNMAPPED_MASTER_FAILED",
82 "UNMAPPED_NO_STABLE_ID",
83 "UNMAPPED_INTERPRO",
84 )
85 )
88class PrimaryXref(Base):
89 __tablename__ = "primary_xref"
91 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
92 sequence: Column = Column(MEDIUMTEXT)
93 sequence_type: Column = Column(Enum("dna", "peptide"))
94 status: Column = Column(Enum("experimental", "predicted"))
97class DependentXref(Base):
98 __tablename__ = "dependent_xref"
100 object_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True)
101 master_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
102 dependent_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
103 linkage_annotation: Column = Column(VARCHAR(255))
104 linkage_source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, primary_key=True)
107class Synonym(Base):
108 __tablename__ = "synonym"
110 xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
111 synonym: Column = Column(VARCHAR(255), index=True, primary_key=True)
114class Source(Base):
115 __tablename__ = "source"
117 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
118 name: Column = Column(VARCHAR(255), nullable=False, index=True)
119 status: Column = Column(
120 Enum("KNOWN", "XREF", "PRED", "ORTH", "PSEUDO", "LOWEVIDENCE", "NOIDEA"),
121 nullable=False,
122 server_default=text("'NOIDEA'"),
123 )
124 source_release: Column = Column(VARCHAR(255))
125 ordered: Column = Column(INTEGER(10, unsigned=True), nullable=False)
126 priority: Column = Column(INTEGER(5, unsigned=True), server_default=text("1"))
127 priority_description: Column = Column(VARCHAR(40), server_default=text("''"))
130class SourceURL(Base):
131 __tablename__ = "source_url"
133 source_url_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
134 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
135 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
136 parser: Column = Column(VARCHAR(255))
139class GeneDirectXref(Base):
140 __tablename__ = "gene_direct_xref"
142 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
143 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
144 linkage_xref: Column = Column(VARCHAR(255))
147class TranscriptDirectXref(Base):
148 __tablename__ = "transcript_direct_xref"
150 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
151 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
152 linkage_xref: Column = Column(VARCHAR(255))
155class TranslationDirectXref(Base):
156 __tablename__ = "translation_direct_xref"
158 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
159 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
160 linkage_xref: Column = Column(VARCHAR(255))
163class Species(Base):
164 __tablename__ = "species"
166 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
167 taxonomy_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
168 name: Column = Column(VARCHAR(255), nullable=False, index=True)
169 aliases: Column = Column(VARCHAR(255))
172class Pairs(Base):
173 __tablename__ = "pairs"
175 pair_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
176 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
177 accession1: Column = Column(VARCHAR(255), nullable=False, index=True)
178 accession2: Column = Column(VARCHAR(255), nullable=False, index=True)
181class CoordinateXref(Base):
182 __tablename__ = "coordinate_xref"
183 __table_args__ = (
184 Index("start_pos_idx", "species_id", "chromosome", "strand", "txStart"),
185 Index("end_pos_idx", "species_id", "chromosome", "strand", "txEnd"),
186 )
188 coord_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
189 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
190 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
191 accession: Column = Column(VARCHAR(255), nullable=False)
192 chromosome: Column = Column(VARCHAR(255), nullable=False)
193 strand: Column = Column(TINYINT(2), nullable=False)
194 txStart: Column = Column(INTEGER(10), nullable=False)
195 txEnd: Column = Column(INTEGER(10), nullable=False)
196 cdsStart: Column = Column(INTEGER(10))
197 cdsEnd: Column = Column(INTEGER(10))
198 exonStarts: Column = Column(TEXT, nullable=False)
199 exonEnds: Column = Column(TEXT, nullable=False)
202class ChecksumXref(Base):
203 __tablename__ = "checksum_xref"
204 __table_args__ = (Index("checksum_idx", "checksum", mysql_length=10),)
206 checksum_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
207 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
208 accession: Column = Column(CHAR(14), nullable=False)
209 checksum: Column = Column(CHAR(32), nullable=False)
212class Mapping(Base):
213 __tablename__ = "mapping"
215 job_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
216 type: Column = Column(Enum("dna", "peptide", "UCSC"))
217 command_line: Column = Column(TEXT)
218 percent_query_cutoff: Column = Column(INTEGER(10, unsigned=True))
219 percent_target_cutoff: Column = Column(INTEGER(10, unsigned=True))
220 method: Column = Column(VARCHAR(255))
221 array_size: Column = Column(INTEGER(10, unsigned=True))
224class MappingJobs(Base):
225 __tablename__ = "mapping_jobs"
227 mapping_job_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
228 root_dir: Column = Column(TEXT)
229 map_file: Column = Column(VARCHAR(255))
230 status: Column = Column(Enum("SUBMITTED", "FAILED", "SUCCESS"))
231 out_file: Column = Column(VARCHAR(255))
232 err_file: Column = Column(VARCHAR(255))
233 array_number: Column = Column(INTEGER(10, unsigned=True))
234 job_id: Column = Column(INTEGER(10, unsigned=True))
235 failed_reason: Column = Column(VARCHAR(255))
236 object_xref_start: Column = Column(INTEGER(10, unsigned=True))
237 object_xref_end: Column = Column(INTEGER(10, unsigned=True))
240class GeneTranscriptTranslation(Base):
241 __tablename__ = "gene_transcript_translation"
243 gene_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
244 transcript_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
245 translation_id: Column = Column(INTEGER(10, unsigned=True), index=True)
248class ProcessStatus(Base):
249 __tablename__ = "process_status"
251 id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
252 status: Column = Column(
253 Enum(
254 "xref_created",
255 "parsing_started",
256 "parsing_finished",
257 "alt_alleles_added",
258 "xref_fasta_dumped",
259 "core_fasta_dumped",
260 "core_data_loaded",
261 "mapping_submitted",
262 "mapping_finished",
263 "mapping_processed",
264 "direct_xrefs_parsed",
265 "priorities_flagged",
266 "processed_pairs",
267 "biomart_test_finished",
268 "source_level_move_finished",
269 "alt_alleles_processed",
270 "official_naming_done",
271 "checksum_xrefs_started",
272 "checksum_xrefs_finished",
273 "coordinate_xrefs_started",
274 "coordinate_xref_finished",
275 "tests_started",
276 "tests_failed",
277 "tests_finished",
278 "core_loaded",
279 "display_xrefs_done",
280 "gene_descriptions_done",
281 "direct_stable_id_check_done",
282 "xrefs_counts_check_done",
283 "name_change_check_done",
284 )
285 )
286 date: Column = Column(DateTime, nullable=False)
289class DisplayXrefPriority(Base):
290 __tablename__ = "display_xref_priority"
292 ensembl_object_type: Column = Column(VARCHAR(100), primary_key=True)
293 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
294 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
297class GeneDescPriority(Base):
298 __tablename__ = "gene_desc_priority"
300 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
301 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
304class AltAllele(Base):
305 __tablename__ = "alt_allele"
307 alt_allele_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
308 gene_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
309 is_reference: Column = Column(INTEGER(2, unsigned=True), server_default=text("0"))
312class GeneStableId(Base):
313 __tablename__ = "gene_stable_id"
315 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
316 stable_id: Column = Column(VARCHAR(128), primary_key=True)
317 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
318 desc_set: Column = Column(INTEGER(10, unsigned=True), server_default=text("0"))
321class TranscriptStableId(Base):
322 __tablename__ = "transcript_stable_id"
324 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
325 stable_id: Column = Column(VARCHAR(128), primary_key=True)
326 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
327 biotype: Column = Column(VARCHAR(40), nullable=False)
330class TranslationStableId(Base):
331 __tablename__ = "translation_stable_id"
333 internal_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
334 stable_id: Column = Column(VARCHAR(128), nullable=False, index=True)
337class ObjectXref(Base):
338 __tablename__ = "object_xref"
339 __table_args__ = (
340 Index(
341 "unique_idx",
342 "ensembl_object_type",
343 "ensembl_id",
344 "xref_id",
345 "ox_status",
346 "master_xref_id",
347 unique=True,
348 ),
349 Index("oxref_idx", "object_xref_id", "xref_id", "ensembl_object_type", "ensembl_id"),
350 Index("xref_idx", "xref_id", "ensembl_object_type"),
351 )
353 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
354 ensembl_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
355 ensembl_object_type: Column = Column(
356 Enum("RawContig", "Transcript", "Gene", "Translation"), nullable=False
357 )
358 xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
359 linkage_annotation: Column = Column(VARCHAR(255))
360 linkage_type: Column = Column(
361 Enum(
362 "PROJECTION",
363 "MISC",
364 "DEPENDENT",
365 "DIRECT",
366 "SEQUENCE_MATCH",
367 "INFERRED_PAIR",
368 "PROBE",
369 "UNMAPPED",
370 "COORDINATE_OVERLAP",
371 "CHECKSUM",
372 )
373 )
374 ox_status: Column = Column(
375 Enum("DUMP_OUT", "FAILED_PRIORITY", "FAILED_CUTOFF", "NO_DISPLAY", "MULTI_DELETE"),
376 nullable=False,
377 server_default=text("'DUMP_OUT'"),
378 )
379 unused_priority: Column = Column(INTEGER(10, unsigned=True))
380 master_xref_id: Column = Column(INTEGER(10, unsigned=True))
383class IdentityXref(Base):
384 __tablename__ = "identity_xref"
386 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
387 query_identity: Column = Column(INTEGER(5))
388 target_identity: Column = Column(INTEGER(5))
389 hit_start: Column = Column(INTEGER(10))
390 hit_end: Column = Column(INTEGER(10))
391 translation_start: Column = Column(INTEGER(10))
392 translation_end: Column = Column(INTEGER(10))
393 cigar_line: Column = Column(TEXT)
394 score: Column = Column(DOUBLE)
395 evalue: Column = Column(DOUBLE)
398class Meta(Base):
399 __tablename__ = "meta"
400 __table_args__ = (
401 Index("species_key_value_idx", "meta_id", "species_id", "meta_key", "meta_value", unique=True),
402 Index("species_value_idx", "species_id", "meta_value"),
403 )
405 meta_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
406 species_id: Column = Column(INTEGER(10, unsigned=True), server_default=text("1"))
407 meta_key: Column = Column(VARCHAR(40), nullable=False)
408 meta_value: Column = Column(VARCHAR(255, binary=True), nullable=False)
409 date: Column = Column(DateTime, nullable=False)