Coverage for src/python/ensembl/xrefs/xref_update_db_model.py: 100%

192 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-28 09:42 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""Xref Update database ORM.""" 

15# Ignore some pylint and mypy checks due to the nature of SQLAlchemy ORMs 

16# pylint: disable=missing-class-docstring,too-many-lines 

17# mypy: disable-error-code="misc, valid-type" 

18 

19from sqlalchemy import Column, Index, Enum, DateTime, text 

20from sqlalchemy.dialects.mysql import ( 

21 INTEGER, 

22 VARCHAR, 

23 TEXT, 

24 MEDIUMTEXT, 

25 TINYINT, 

26 CHAR, 

27 SMALLINT, 

28 DOUBLE, 

29) 

30from sqlalchemy.orm import declarative_base 

31 

32Base = declarative_base() 

33 

34 

35class Xref(Base): 

36 __tablename__ = "xref" 

37 __table_args__ = ( 

38 Index( 

39 "acession_idx", 

40 "accession", 

41 "source_id", 

42 "species_id", 

43 "label", 

44 unique=True, 

45 mysql_length={"accession": 100, "label": 100}, 

46 ), 

47 Index("species_source_idx", "species_id", "source_id"), 

48 ) 

49 

50 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

51 accession: Column = Column(VARCHAR(255), nullable=False) 

52 version: Column = Column(INTEGER(10, unsigned=True)) 

53 label: Column = Column(VARCHAR(255)) 

54 description: Column = Column(TEXT) 

55 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

56 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

57 info_type: Column = Column( 

58 Enum( 

59 "NONE", 

60 "PROJECTION", 

61 "MISC", 

62 "DEPENDENT", 

63 "DIRECT", 

64 "SEQUENCE_MATCH", 

65 "INFERRED_PAIR", 

66 "PROBE", 

67 "UNMAPPED", 

68 "COORDINATE_OVERLAP", 

69 "CHECKSUM", 

70 ), 

71 nullable=False, 

72 server_default=text("'NONE'"), 

73 ) 

74 info_text: Column = Column(VARCHAR(255), nullable=False, server_default=text("''")) 

75 dumped: Column = Column( 

76 Enum( 

77 "MAPPED", 

78 "NO_DUMP_ANOTHER_PRIORITY", 

79 "UNMAPPED_NO_MAPPING", 

80 "UNMAPPED_NO_MASTER", 

81 "UNMAPPED_MASTER_FAILED", 

82 "UNMAPPED_NO_STABLE_ID", 

83 "UNMAPPED_INTERPRO", 

84 ) 

85 ) 

86 

87 

88class PrimaryXref(Base): 

89 __tablename__ = "primary_xref" 

90 

91 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

92 sequence: Column = Column(MEDIUMTEXT) 

93 sequence_type: Column = Column(Enum("dna", "peptide")) 

94 status: Column = Column(Enum("experimental", "predicted")) 

95 

96 

97class DependentXref(Base): 

98 __tablename__ = "dependent_xref" 

99 

100 object_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True) 

101 master_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

102 dependent_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

103 linkage_annotation: Column = Column(VARCHAR(255)) 

104 linkage_source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, primary_key=True) 

105 

106 

107class Synonym(Base): 

108 __tablename__ = "synonym" 

109 

110 xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

111 synonym: Column = Column(VARCHAR(255), index=True, primary_key=True) 

112 

113 

114class Source(Base): 

115 __tablename__ = "source" 

116 

117 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

118 name: Column = Column(VARCHAR(255), nullable=False, index=True) 

119 status: Column = Column( 

120 Enum("KNOWN", "XREF", "PRED", "ORTH", "PSEUDO", "LOWEVIDENCE", "NOIDEA"), 

121 nullable=False, 

122 server_default=text("'NOIDEA'"), 

123 ) 

124 source_release: Column = Column(VARCHAR(255)) 

125 ordered: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

126 priority: Column = Column(INTEGER(5, unsigned=True), server_default=text("1")) 

127 priority_description: Column = Column(VARCHAR(40), server_default=text("''")) 

128 

129 

130class SourceURL(Base): 

131 __tablename__ = "source_url" 

132 

133 source_url_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

134 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

135 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

136 parser: Column = Column(VARCHAR(255)) 

137 

138 

139class GeneDirectXref(Base): 

140 __tablename__ = "gene_direct_xref" 

141 

142 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

143 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

144 linkage_xref: Column = Column(VARCHAR(255)) 

145 

146 

147class TranscriptDirectXref(Base): 

148 __tablename__ = "transcript_direct_xref" 

149 

150 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

151 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

152 linkage_xref: Column = Column(VARCHAR(255)) 

153 

154 

155class TranslationDirectXref(Base): 

156 __tablename__ = "translation_direct_xref" 

157 

158 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

159 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

160 linkage_xref: Column = Column(VARCHAR(255)) 

161 

162 

163class Species(Base): 

164 __tablename__ = "species" 

165 

166 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True) 

167 taxonomy_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True) 

168 name: Column = Column(VARCHAR(255), nullable=False, index=True) 

169 aliases: Column = Column(VARCHAR(255)) 

170 

171 

172class Pairs(Base): 

173 __tablename__ = "pairs" 

174 

175 pair_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

176 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

177 accession1: Column = Column(VARCHAR(255), nullable=False, index=True) 

178 accession2: Column = Column(VARCHAR(255), nullable=False, index=True) 

179 

180 

181class CoordinateXref(Base): 

182 __tablename__ = "coordinate_xref" 

183 __table_args__ = ( 

184 Index("start_pos_idx", "species_id", "chromosome", "strand", "txStart"), 

185 Index("end_pos_idx", "species_id", "chromosome", "strand", "txEnd"), 

186 ) 

187 

188 coord_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

189 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

190 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

191 accession: Column = Column(VARCHAR(255), nullable=False) 

192 chromosome: Column = Column(VARCHAR(255), nullable=False) 

193 strand: Column = Column(TINYINT(2), nullable=False) 

194 txStart: Column = Column(INTEGER(10), nullable=False) 

195 txEnd: Column = Column(INTEGER(10), nullable=False) 

196 cdsStart: Column = Column(INTEGER(10)) 

197 cdsEnd: Column = Column(INTEGER(10)) 

198 exonStarts: Column = Column(TEXT, nullable=False) 

199 exonEnds: Column = Column(TEXT, nullable=False) 

200 

201 

202class ChecksumXref(Base): 

203 __tablename__ = "checksum_xref" 

204 __table_args__ = (Index("checksum_idx", "checksum", mysql_length=10),) 

205 

206 checksum_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

207 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

208 accession: Column = Column(CHAR(14), nullable=False) 

209 checksum: Column = Column(CHAR(32), nullable=False) 

210 

211 

212class Mapping(Base): 

213 __tablename__ = "mapping" 

214 

215 job_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

216 type: Column = Column(Enum("dna", "peptide", "UCSC")) 

217 command_line: Column = Column(TEXT) 

218 percent_query_cutoff: Column = Column(INTEGER(10, unsigned=True)) 

219 percent_target_cutoff: Column = Column(INTEGER(10, unsigned=True)) 

220 method: Column = Column(VARCHAR(255)) 

221 array_size: Column = Column(INTEGER(10, unsigned=True)) 

222 

223 

224class MappingJobs(Base): 

225 __tablename__ = "mapping_jobs" 

226 

227 mapping_job_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True) 

228 root_dir: Column = Column(TEXT) 

229 map_file: Column = Column(VARCHAR(255)) 

230 status: Column = Column(Enum("SUBMITTED", "FAILED", "SUCCESS")) 

231 out_file: Column = Column(VARCHAR(255)) 

232 err_file: Column = Column(VARCHAR(255)) 

233 array_number: Column = Column(INTEGER(10, unsigned=True)) 

234 job_id: Column = Column(INTEGER(10, unsigned=True)) 

235 failed_reason: Column = Column(VARCHAR(255)) 

236 object_xref_start: Column = Column(INTEGER(10, unsigned=True)) 

237 object_xref_end: Column = Column(INTEGER(10, unsigned=True)) 

238 

239 

240class GeneTranscriptTranslation(Base): 

241 __tablename__ = "gene_transcript_translation" 

242 

243 gene_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

244 transcript_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

245 translation_id: Column = Column(INTEGER(10, unsigned=True), index=True) 

246 

247 

248class ProcessStatus(Base): 

249 __tablename__ = "process_status" 

250 

251 id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

252 status: Column = Column( 

253 Enum( 

254 "xref_created", 

255 "parsing_started", 

256 "parsing_finished", 

257 "alt_alleles_added", 

258 "xref_fasta_dumped", 

259 "core_fasta_dumped", 

260 "core_data_loaded", 

261 "mapping_submitted", 

262 "mapping_finished", 

263 "mapping_processed", 

264 "direct_xrefs_parsed", 

265 "prioritys_flagged", 

266 "processed_pairs", 

267 "biomart_test_finished", 

268 "source_level_move_finished", 

269 "alt_alleles_processed", 

270 "official_naming_done", 

271 "checksum_xrefs_started", 

272 "checksum_xrefs_finished", 

273 "coordinate_xrefs_started", 

274 "coordinate_xref_finished", 

275 "tests_started", 

276 "tests_failed", 

277 "tests_finished", 

278 "core_loaded", 

279 "display_xref_done", 

280 "gene_description_done", 

281 ) 

282 ) 

283 date: Column = Column(DateTime, nullable=False) 

284 

285 

286class DisplayXrefPriority(Base): 

287 __tablename__ = "display_xref_priority" 

288 

289 ensembl_object_type: Column = Column(VARCHAR(100), primary_key=True) 

290 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

291 priority: Column = Column(SMALLINT(unsigned=True), nullable=False) 

292 

293 

294class GeneDescPriority(Base): 

295 __tablename__ = "gene_desc_priority" 

296 

297 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

298 priority: Column = Column(SMALLINT(unsigned=True), nullable=False) 

299 

300 

301class AltAllele(Base): 

302 __tablename__ = "alt_allele" 

303 

304 alt_allele_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

305 gene_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

306 is_reference: Column = Column(INTEGER(2, unsigned=True), server_default=text("0")) 

307 

308 

309class GeneStableId(Base): 

310 __tablename__ = "gene_stable_id" 

311 

312 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

313 stable_id: Column = Column(VARCHAR(128), primary_key=True) 

314 display_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

315 desc_set: Column = Column(INTEGER(10, unsigned=True), server_default=text("0")) 

316 

317 

318class TranscriptStableId(Base): 

319 __tablename__ = "transcript_stable_id" 

320 

321 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

322 stable_id: Column = Column(VARCHAR(128), primary_key=True) 

323 display_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

324 biotype: Column = Column(VARCHAR(40), nullable=False) 

325 

326 

327class TranslationStableId(Base): 

328 __tablename__ = "translation_stable_id" 

329 

330 internal_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

331 stable_id: Column = Column(VARCHAR(128), nullable=False, index=True) 

332 

333 

334class ObjectXref(Base): 

335 __tablename__ = "object_xref" 

336 __table_args__ = ( 

337 Index( 

338 "unique_idx", 

339 "ensembl_object_type", 

340 "ensembl_id", 

341 "xref_id", 

342 "ox_status", 

343 "master_xref_id", 

344 unique=True, 

345 ), 

346 Index("oxref_idx", "object_xref_id", "xref_id", "ensembl_object_type", "ensembl_id"), 

347 Index("xref_idx", "xref_id", "ensembl_object_type"), 

348 ) 

349 

350 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

351 ensembl_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

352 ensembl_object_type: Column = Column( 

353 Enum("RawContig", "Transcript", "Gene", "Translation"), nullable=False 

354 ) 

355 xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

356 linkage_annotation: Column = Column(VARCHAR(255)) 

357 linkage_type: Column = Column( 

358 Enum( 

359 "PROJECTION", 

360 "MISC", 

361 "DEPENDENT", 

362 "DIRECT", 

363 "SEQUENCE_MATCH", 

364 "INFERRED_PAIR", 

365 "PROBE", 

366 "UNMAPPED", 

367 "COORDINATE_OVERLAP", 

368 "CHECKSUM", 

369 ) 

370 ) 

371 ox_status: Column = Column( 

372 Enum("DUMP_OUT", "FAILED_PRIORITY", "FAILED_CUTOFF", "NO_DISPLAY", "MULTI_DELETE"), 

373 nullable=False, 

374 server_default=text("'DUMP_OUT'"), 

375 ) 

376 unused_priority: Column = Column(INTEGER(10, unsigned=True)) 

377 master_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

378 

379 

380class IdentityXref(Base): 

381 __tablename__ = "identity_xref" 

382 

383 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

384 query_identity: Column = Column(INTEGER(5)) 

385 target_identity: Column = Column(INTEGER(5)) 

386 hit_start: Column = Column(INTEGER(10)) 

387 hit_end: Column = Column(INTEGER(10)) 

388 translation_start: Column = Column(INTEGER(10)) 

389 translation_end: Column = Column(INTEGER(10)) 

390 cigar_line: Column = Column(TEXT) 

391 score: Column = Column(DOUBLE) 

392 evalue: Column = Column(DOUBLE) 

393 

394 

395class Meta(Base): 

396 __tablename__ = "meta" 

397 __table_args__ = ( 

398 Index("species_key_value_idx", "meta_id", "species_id", "meta_key", "meta_value", unique=True), 

399 Index("species_value_idx", "species_id", "meta_value"), 

400 ) 

401 

402 meta_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True) 

403 species_id: Column = Column(INTEGER(10, unsigned=True), server_default=text("1")) 

404 meta_key: Column = Column(VARCHAR(40), nullable=False) 

405 meta_value: Column = Column(VARCHAR(255, binary=True), nullable=False) 

406 date: Column = Column(DateTime, nullable=False)