Coverage for src/python/ensembl/xrefs/xref_update_db_model.py: 100%
196 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-17 13:09 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-17 13:09 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Xref Update database ORM."""
15# Ignore some pylint and mypy checks due to the nature of SQLAlchemy ORMs
16# pylint: disable=missing-class-docstring,too-many-lines
17# mypy: disable-error-code="misc, valid-type"
19from sqlalchemy import Column, Index, Enum, DateTime, text
20from sqlalchemy.dialects.mysql import (
21 INTEGER,
22 VARCHAR,
23 TEXT,
24 MEDIUMTEXT,
25 TINYINT,
26 CHAR,
27 SMALLINT,
28 DOUBLE,
29)
30from sqlalchemy.orm import declarative_base
32Base = declarative_base()
35class Xref(Base):
36 __tablename__ = "xref"
37 __table_args__ = (
38 Index(
39 "acession_idx",
40 "accession",
41 "source_id",
42 "species_id",
43 "label",
44 unique=True,
45 mysql_length={"accession": 100, "label": 100},
46 ),
47 Index("species_source_idx", "species_id", "source_id"),
48 )
50 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
51 accession: Column = Column(VARCHAR(255), nullable=False)
52 version: Column = Column(INTEGER(10, unsigned=True))
53 label: Column = Column(VARCHAR(255))
54 description: Column = Column(TEXT)
55 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
56 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
57 info_type: Column = Column(
58 Enum(
59 "NONE",
60 "PROJECTION",
61 "MISC",
62 "DEPENDENT",
63 "DIRECT",
64 "SEQUENCE_MATCH",
65 "INFERRED_PAIR",
66 "PROBE",
67 "UNMAPPED",
68 "COORDINATE_OVERLAP",
69 "CHECKSUM",
70 ),
71 nullable=False,
72 server_default=text("'NONE'"),
73 )
74 info_text: Column = Column(VARCHAR(255), nullable=False, server_default=text("''"))
75 dumped: Column = Column(
76 Enum(
77 "MAPPED",
78 "NO_DUMP_ANOTHER_PRIORITY",
79 "UNMAPPED_NO_MAPPING",
80 "UNMAPPED_NO_MASTER",
81 "UNMAPPED_MASTER_FAILED",
82 "UNMAPPED_NO_STABLE_ID",
83 "UNMAPPED_INTERPRO",
84 )
85 )
88class PrimaryXref(Base):
89 __tablename__ = "primary_xref"
91 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
92 sequence: Column = Column(MEDIUMTEXT)
93 sequence_type: Column = Column(Enum("dna", "peptide"))
94 status: Column = Column(Enum("experimental", "predicted"))
97class DependentXref(Base):
98 __tablename__ = "dependent_xref"
100 object_xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
101 master_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
102 dependent_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
103 linkage_annotation: Column = Column(VARCHAR(255))
104 linkage_source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, primary_key=True)
107class Synonym(Base):
108 __tablename__ = "synonym"
110 xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
111 synonym: Column = Column(VARCHAR(255), index=True, primary_key=True)
114class Source(Base):
115 __tablename__ = "source"
117 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
118 name: Column = Column(VARCHAR(255), nullable=False, index=True)
119 status: Column = Column(
120 Enum("KNOWN", "XREF", "PRED", "ORTH", "PSEUDO", "LOWEVIDENCE", "NOIDEA"),
121 nullable=False,
122 server_default=text("'NOIDEA'"),
123 )
124 source_release: Column = Column(VARCHAR(255))
125 ordered: Column = Column(INTEGER(10, unsigned=True), nullable=False)
126 priority: Column = Column(INTEGER(5, unsigned=True), server_default=text("1"))
127 priority_description: Column = Column(VARCHAR(40), server_default=text("''"))
130class SourceURL(Base):
131 __tablename__ = "source_url"
133 source_url_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
134 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
135 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
136 parser: Column = Column(VARCHAR(255))
139class SourceMappingMethod(Base):
140 __tablename__ = "source_mapping_method"
142 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
143 method: Column = Column(VARCHAR(255), primary_key=True)
146class GeneDirectXref(Base):
147 __tablename__ = "gene_direct_xref"
149 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
150 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
151 linkage_xref: Column = Column(VARCHAR(255))
154class TranscriptDirectXref(Base):
155 __tablename__ = "transcript_direct_xref"
157 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
158 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
159 linkage_xref: Column = Column(VARCHAR(255))
162class TranslationDirectXref(Base):
163 __tablename__ = "translation_direct_xref"
165 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True)
166 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True)
167 linkage_xref: Column = Column(VARCHAR(255))
170class Species(Base):
171 __tablename__ = "species"
173 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
174 taxonomy_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True)
175 name: Column = Column(VARCHAR(255), nullable=False, index=True)
176 aliases: Column = Column(VARCHAR(255))
179class Pairs(Base):
180 __tablename__ = "pairs"
182 pair_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
183 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
184 accession1: Column = Column(VARCHAR(255), nullable=False, index=True)
185 accession2: Column = Column(VARCHAR(255), nullable=False, index=True)
188class CoordinateXref(Base):
189 __tablename__ = "coordinate_xref"
190 __table_args__ = (
191 Index("start_pos_idx", "species_id", "chromosome", "strand", "txStart"),
192 Index("end_pos_idx", "species_id", "chromosome", "strand", "txEnd"),
193 )
195 coord_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
196 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
197 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
198 accession: Column = Column(VARCHAR(255), nullable=False)
199 chromosome: Column = Column(VARCHAR(255), nullable=False)
200 strand: Column = Column(TINYINT(2), nullable=False)
201 txStart: Column = Column(INTEGER(10), nullable=False)
202 txEnd: Column = Column(INTEGER(10), nullable=False)
203 cdsStart: Column = Column(INTEGER(10))
204 cdsEnd: Column = Column(INTEGER(10))
205 exonStarts: Column = Column(TEXT, nullable=False)
206 exonEnds: Column = Column(TEXT, nullable=False)
209class ChecksumXref(Base):
210 __tablename__ = "checksum_xref"
211 __table_args__ = (Index("checksum_idx", "checksum", mysql_length=10),)
213 checksum_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
214 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
215 accession: Column = Column(CHAR(14), nullable=False)
216 checksum: Column = Column(CHAR(32), nullable=False)
219class Mapping(Base):
220 __tablename__ = "mapping"
222 job_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
223 type: Column = Column(Enum("dna", "peptide", "UCSC"))
224 command_line: Column = Column(TEXT)
225 percent_query_cutoff: Column = Column(INTEGER(10, unsigned=True))
226 percent_target_cutoff: Column = Column(INTEGER(10, unsigned=True))
227 method: Column = Column(VARCHAR(255))
228 array_size: Column = Column(INTEGER(10, unsigned=True))
231class MappingJobs(Base):
232 __tablename__ = "mapping_jobs"
234 mapping_job_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
235 root_dir: Column = Column(TEXT)
236 map_file: Column = Column(VARCHAR(255))
237 status: Column = Column(Enum("SUBMITTED", "FAILED", "SUCCESS"))
238 out_file: Column = Column(VARCHAR(255))
239 err_file: Column = Column(VARCHAR(255))
240 array_number: Column = Column(INTEGER(10, unsigned=True))
241 job_id: Column = Column(INTEGER(10, unsigned=True))
242 failed_reason: Column = Column(VARCHAR(255))
243 object_xref_start: Column = Column(INTEGER(10, unsigned=True))
244 object_xref_end: Column = Column(INTEGER(10, unsigned=True))
247class GeneTranscriptTranslation(Base):
248 __tablename__ = "gene_transcript_translation"
250 gene_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
251 transcript_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
252 translation_id: Column = Column(INTEGER(10, unsigned=True), index=True)
255class ProcessStatus(Base):
256 __tablename__ = "process_status"
258 id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
259 status: Column = Column(
260 Enum(
261 "xref_created",
262 "parsing_started",
263 "parsing_finished",
264 "alt_alleles_added",
265 "xref_fasta_dumped",
266 "core_fasta_dumped",
267 "core_data_loaded",
268 "mapping_submitted",
269 "mapping_finished",
270 "mapping_processed",
271 "direct_xrefs_parsed",
272 "prioritys_flagged",
273 "processed_pairs",
274 "biomart_test_finished",
275 "source_level_move_finished",
276 "alt_alleles_processed",
277 "official_naming_done",
278 "checksum_xrefs_started",
279 "checksum_xrefs_finished",
280 "coordinate_xrefs_started",
281 "coordinate_xref_finished",
282 "tests_started",
283 "tests_failed",
284 "tests_finished",
285 "core_loaded",
286 "display_xref_done",
287 "gene_description_done",
288 )
289 )
290 date: Column = Column(DateTime, nullable=False)
293class DisplayXrefPriority(Base):
294 __tablename__ = "display_xref_priority"
296 ensembl_object_type: Column = Column(VARCHAR(100), primary_key=True)
297 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
298 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
301class GeneDescPriority(Base):
302 __tablename__ = "gene_desc_priority"
304 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
305 priority: Column = Column(SMALLINT(unsigned=True), nullable=False)
308class AltAllele(Base):
309 __tablename__ = "alt_allele"
311 alt_allele_id: Column = Column(INTEGER(10, unsigned=True), autoincrement=True, primary_key=True)
312 gene_id: Column = Column(INTEGER(10, unsigned=True), index=True)
313 is_reference: Column = Column(INTEGER(2, unsigned=True), server_default=text("0"))
316class GeneStableId(Base):
317 __tablename__ = "gene_stable_id"
319 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
320 stable_id: Column = Column(VARCHAR(128), primary_key=True)
321 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
322 desc_set: Column = Column(INTEGER(10, unsigned=True), server_default=text("0"))
325class TranscriptStableId(Base):
326 __tablename__ = "transcript_stable_id"
328 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True)
329 stable_id: Column = Column(VARCHAR(128), primary_key=True)
330 display_xref_id: Column = Column(INTEGER(10, unsigned=True))
331 biotype: Column = Column(VARCHAR(40), nullable=False)
334class TranslationStableId(Base):
335 __tablename__ = "translation_stable_id"
337 internal_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
338 stable_id: Column = Column(VARCHAR(128), nullable=False, index=True)
341class ObjectXref(Base):
342 __tablename__ = "object_xref"
343 __table_args__ = (
344 Index(
345 "unique_idx",
346 "ensembl_object_type",
347 "ensembl_id",
348 "xref_id",
349 "ox_status",
350 "master_xref_id",
351 unique=True,
352 ),
353 Index("oxref_idx", "object_xref_id", "xref_id", "ensembl_object_type", "ensembl_id"),
354 Index("xref_idx", "xref_id", "ensembl_object_type"),
355 )
357 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True)
358 ensembl_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
359 ensembl_object_type: Column = Column(
360 Enum("RawContig", "Transcript", "Gene", "Translation"), nullable=False
361 )
362 xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False)
363 linkage_annotation: Column = Column(VARCHAR(255))
364 linkage_type: Column = Column(
365 Enum(
366 "PROJECTION",
367 "MISC",
368 "DEPENDENT",
369 "DIRECT",
370 "SEQUENCE_MATCH",
371 "INFERRED_PAIR",
372 "PROBE",
373 "UNMAPPED",
374 "COORDINATE_OVERLAP",
375 "CHECKSUM",
376 )
377 )
378 ox_status: Column = Column(
379 Enum("DUMP_OUT", "FAILED_PRIORITY", "FAILED_CUTOFF", "NO_DISPLAY", "MULTI_DELETE"),
380 nullable=False,
381 server_default=text("'DUMP_OUT'"),
382 )
383 unused_priority: Column = Column(INTEGER(10, unsigned=True))
384 master_xref_id: Column = Column(INTEGER(10, unsigned=True))
387class IdentityXref(Base):
388 __tablename__ = "identity_xref"
390 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True)
391 query_identity: Column = Column(INTEGER(5))
392 target_identity: Column = Column(INTEGER(5))
393 hit_start: Column = Column(INTEGER(10))
394 hit_end: Column = Column(INTEGER(10))
395 translation_start: Column = Column(INTEGER(10))
396 translation_end: Column = Column(INTEGER(10))
397 cigar_line: Column = Column(TEXT)
398 score: Column = Column(DOUBLE)
399 evalue: Column = Column(DOUBLE)
402class Meta(Base):
403 __tablename__ = "meta"
404 __table_args__ = (
405 Index("species_key_value_idx", "meta_id", "species_id", "meta_key", "meta_value", unique=True),
406 Index("species_value_idx", "species_id", "meta_value"),
407 )
409 meta_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True)
410 species_id: Column = Column(INTEGER(10, unsigned=True), server_default=text("1"))
411 meta_key: Column = Column(VARCHAR(40), nullable=False)
412 meta_value: Column = Column(VARCHAR(255, binary=True), nullable=False)
413 date: Column = Column(DateTime, nullable=False)