Coverage for src/python/ensembl/xrefs/xref_update_db_model.py: 100%
192 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 09:42 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 09:42 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Xref Update database ORM."""
15# Ignore some pylint and mypy checks due to the nature of SQLAlchemy ORMs
16# pylint: disable=missing-class-docstring,too-many-lines
17# mypy: disable-error-code="misc, valid-type"
19from sqlalchemy import Column, Index, Enum, DateTime, text
20from sqlalchemy.dialects.mysql import (
21 INTEGER,
22 VARCHAR,
23 TEXT,
24 MEDIUMTEXT,
25 TINYINT,
26 CHAR,
27 SMALLINT,
28 DOUBLE,
29)
30from sqlalchemy.orm import declarative_base
32Base = declarative_base()
35class Xref(Base):
36 __tablename__ = "xref"
37 __table_args__ = (
38 Index(
39 "acession_idx",
40 "accession",
41 "source_id",
42 "species_id",
43 "label",
44 unique=True,
45 mysql_length={"accession": 100, "label": 100},
46 ),
47 Index("species_source_idx", "species_id", "source_id"),
48 )
50 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
51 accession: Column = Column(VARCHAR(255), nullable=False)
52 version: Column = Column(INTEGER(10, unsigned=True))
53 label: Column = Column(VARCHAR(255))
54 description: Column = Column(TEXT)
55 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
56 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
57 info_type: Column = Column(
58 Enum(
59 "NONE",
60 "PROJECTION",
61 "MISC",
62 "DEPENDENT",
63 "DIRECT",
64 "SEQUENCE_MATCH",
65 "INFERRED_PAIR",
66 "PROBE",
67 "UNMAPPED",
68 "COORDINATE_OVERLAP",
69 "CHECKSUM",
70 ),
71 nullable=False,
72 server_default=text("'NONE'"),
73 )
74 info_text: Column = Column(VARCHAR(255), nullable=False, server_default=text("''"))
75 dumped: Column = Column(
76 Enum(
77 "MAPPED",
78 "NO_DUMP_ANOTHER_PRIORITY",
79 "UNMAPPED_NO_MAPPING",
80 "UNMAPPED_NO_MASTER",
81 "UNMAPPED_MASTER_FAILED",
82 "UNMAPPED_NO_STABLE_ID",
83 "UNMAPPED_INTERPRO",
84 )
85 )
88class PrimaryXref(Base):
89 __tablename__ = "primary_xref"
91 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
92 sequence: Column = Column(MEDIUMTEXT)
93 sequence_type: Column = Column(Enum("dna", "peptide"))
94 status: Column = Column(Enum("experimental", "predicted"))
97class DependentXref(Base):
98 __tablename__ = "dependent_xref"
100 object_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True)
101 master_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
102 dependent_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
103 linkage_annotation: Column = Column(VARCHAR(255))
104 linkage_source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, primary_key=True)
107class Synonym(Base):
108 __tablename__ = "synonym"
110 xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
111 synonym: Column = Column(VARCHAR(255), index=True, primary_key=True)
114class Source(Base):
115 __tablename__ = "source"
117 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
118 name: Column = Column(VARCHAR(255), nullable=False, index=True)
119 status: Column = Column(
120 Enum("KNOWN", "XREF", "PRED", "ORTH", "PSEUDO", "LOWEVIDENCE", "NOIDEA"),
121 nullable=False,
122 server_default=text("'NOIDEA'"),
123 )
124 source_release: Column = Column(VARCHAR(255))
125 ordered: Column = Column(INTEGER(10, unsigned=True), nullable=False)
126 priority: Column = Column(INTEGER(5, unsigned=True), server_default=text("1"))
127 priority_description: Column = Column(VARCHAR(40), server_default=text("''"))
130class SourceURL(Base):
131 __tablename__ = "source_url"
133 source_url_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
134 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
135 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
136 parser: Column = Column(VARCHAR(255))
139class GeneDirectXref(Base):
140 __tablename__ = "gene_direct_xref"
142 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
143 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
144 linkage_xref: Column = Column(VARCHAR(255))
147class TranscriptDirectXref(Base):
148 __tablename__ = "transcript_direct_xref"
150 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
151 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
152 linkage_xref: Column = Column(VARCHAR(255))
155class TranslationDirectXref(Base):
156 __tablename__ = "translation_direct_xref"
158 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
159 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
160 linkage_xref: Column = Column(VARCHAR(255))
163class Species(Base):
164 __tablename__ = "species"
166 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
167 taxonomy_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
168 name: Column = Column(VARCHAR(255), nullable=False, index=True)
169 aliases: Column = Column(VARCHAR(255))
172class Pairs(Base):
173 __tablename__ = "pairs"
175 pair_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
176 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
177 accession1: Column = Column(VARCHAR(255), nullable=False, index=True)
178 accession2: Column = Column(VARCHAR(255), nullable=False, index=True)
181class CoordinateXref(Base):
182 __tablename__ = "coordinate_xref"
183 __table_args__ = (
184 Index("start_pos_idx", "species_id", "chromosome", "strand", "txStart"),
185 Index("end_pos_idx", "species_id", "chromosome", "strand", "txEnd"),
186 )
188 coord_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
189 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
190 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
191 accession: Column = Column(VARCHAR(255), nullable=False)
192 chromosome: Column = Column(VARCHAR(255), nullable=False)
193 strand: Column = Column(TINYINT(2), nullable=False)
194 txStart: Column = Column(INTEGER(10), nullable=False)
195 txEnd: Column = Column(INTEGER(10), nullable=False)
196 cdsStart: Column = Column(INTEGER(10))
197 cdsEnd: Column = Column(INTEGER(10))
198 exonStarts: Column = Column(TEXT, nullable=False)
199 exonEnds: Column = Column(TEXT, nullable=False)
202class ChecksumXref(Base):
203 __tablename__ = "checksum_xref"
204 __table_args__ = (Index("checksum_idx", "checksum", mysql_length=10),)
206 checksum_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
207 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
208 accession: Column = Column(CHAR(14), nullable=False)
209 checksum: Column = Column(CHAR(32), nullable=False)
212class Mapping(Base):
213 __tablename__ = "mapping"
215 job_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
216 type: Column = Column(Enum("dna", "peptide", "UCSC"))
217 command_line: Column = Column(TEXT)
218 percent_query_cutoff: Column = Column(INTEGER(10, unsigned=True))
219 percent_target_cutoff: Column = Column(INTEGER(10, unsigned=True))
220 method: Column = Column(VARCHAR(255))
221 array_size: Column = Column(INTEGER(10, unsigned=True))
224class MappingJobs(Base):
225 __tablename__ = "mapping_jobs"
227 mapping_job_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
228 root_dir: Column = Column(TEXT)
229 map_file: Column = Column(VARCHAR(255))
230 status: Column = Column(Enum("SUBMITTED", "FAILED", "SUCCESS"))
231 out_file: Column = Column(VARCHAR(255))
232 err_file: Column = Column(VARCHAR(255))
233 array_number: Column = Column(INTEGER(10, unsigned=True))
234 job_id: Column = Column(INTEGER(10, unsigned=True))
235 failed_reason: Column = Column(VARCHAR(255))
236 object_xref_start: Column = Column(INTEGER(10, unsigned=True))
237 object_xref_end: Column = Column(INTEGER(10, unsigned=True))
240class GeneTranscriptTranslation(Base):
241 __tablename__ = "gene_transcript_translation"
243 gene_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
244 transcript_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
245 translation_id: Column = Column(INTEGER(10, unsigned=True), index=True)
248class ProcessStatus(Base):
249 __tablename__ = "process_status"
251 id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
252 status: Column = Column(
253 Enum(
254 "xref_created",
255 "parsing_started",
256 "parsing_finished",
257 "alt_alleles_added",
258 "xref_fasta_dumped",
259 "core_fasta_dumped",
260 "core_data_loaded",
261 "mapping_submitted",
262 "mapping_finished",
263 "mapping_processed",
264 "direct_xrefs_parsed",
265 "prioritys_flagged",
266 "processed_pairs",
267 "biomart_test_finished",
268 "source_level_move_finished",
269 "alt_alleles_processed",
270 "official_naming_done",
271 "checksum_xrefs_started",
272 "checksum_xrefs_finished",
273 "coordinate_xrefs_started",
274 "coordinate_xref_finished",
275 "tests_started",
276 "tests_failed",
277 "tests_finished",
278 "core_loaded",
279 "display_xref_done",
280 "gene_description_done",
281 )
282 )
283 date: Column = Column(DateTime, nullable=False)
286class DisplayXrefPriority(Base):
287 __tablename__ = "display_xref_priority"
289 ensembl_object_type: Column = Column(VARCHAR(100), primary_key=True)
290 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
291 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
294class GeneDescPriority(Base):
295 __tablename__ = "gene_desc_priority"
297 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
298 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
301class AltAllele(Base):
302 __tablename__ = "alt_allele"
304 alt_allele_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
305 gene_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
306 is_reference: Column = Column(INTEGER(2, unsigned=True), server_default=text("0"))
309class GeneStableId(Base):
310 __tablename__ = "gene_stable_id"
312 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
313 stable_id: Column = Column(VARCHAR(128), primary_key=True)
314 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
315 desc_set: Column = Column(INTEGER(10, unsigned=True), server_default=text("0"))
318class TranscriptStableId(Base):
319 __tablename__ = "transcript_stable_id"
321 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
322 stable_id: Column = Column(VARCHAR(128), primary_key=True)
323 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
324 biotype: Column = Column(VARCHAR(40), nullable=False)
327class TranslationStableId(Base):
328 __tablename__ = "translation_stable_id"
330 internal_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
331 stable_id: Column = Column(VARCHAR(128), nullable=False, index=True)
334class ObjectXref(Base):
335 __tablename__ = "object_xref"
336 __table_args__ = (
337 Index(
338 "unique_idx",
339 "ensembl_object_type",
340 "ensembl_id",
341 "xref_id",
342 "ox_status",
343 "master_xref_id",
344 unique=True,
345 ),
346 Index("oxref_idx", "object_xref_id", "xref_id", "ensembl_object_type", "ensembl_id"),
347 Index("xref_idx", "xref_id", "ensembl_object_type"),
348 )
350 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
351 ensembl_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
352 ensembl_object_type: Column = Column(
353 Enum("RawContig", "Transcript", "Gene", "Translation"), nullable=False
354 )
355 xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
356 linkage_annotation: Column = Column(VARCHAR(255))
357 linkage_type: Column = Column(
358 Enum(
359 "PROJECTION",
360 "MISC",
361 "DEPENDENT",
362 "DIRECT",
363 "SEQUENCE_MATCH",
364 "INFERRED_PAIR",
365 "PROBE",
366 "UNMAPPED",
367 "COORDINATE_OVERLAP",
368 "CHECKSUM",
369 )
370 )
371 ox_status: Column = Column(
372 Enum("DUMP_OUT", "FAILED_PRIORITY", "FAILED_CUTOFF", "NO_DISPLAY", "MULTI_DELETE"),
373 nullable=False,
374 server_default=text("'DUMP_OUT'"),
375 )
376 unused_priority: Column = Column(INTEGER(10, unsigned=True))
377 master_xref_id: Column = Column(INTEGER(10, unsigned=True))
380class IdentityXref(Base):
381 __tablename__ = "identity_xref"
383 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
384 query_identity: Column = Column(INTEGER(5))
385 target_identity: Column = Column(INTEGER(5))
386 hit_start: Column = Column(INTEGER(10))
387 hit_end: Column = Column(INTEGER(10))
388 translation_start: Column = Column(INTEGER(10))
389 translation_end: Column = Column(INTEGER(10))
390 cigar_line: Column = Column(TEXT)
391 score: Column = Column(DOUBLE)
392 evalue: Column = Column(DOUBLE)
395class Meta(Base):
396 __tablename__ = "meta"
397 __table_args__ = (
398 Index("species_key_value_idx", "meta_id", "species_id", "meta_key", "meta_value", unique=True),
399 Index("species_value_idx", "species_id", "meta_value"),
400 )
402 meta_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
403 species_id: Column = Column(INTEGER(10, unsigned=True), server_default=text("1"))
404 meta_key: Column = Column(VARCHAR(40), nullable=False)
405 meta_value: Column = Column(VARCHAR(255, binary=True), nullable=False)
406 date: Column = Column(DateTime, nullable=False)