Coverage for src/python/ensembl/xrefs/xref_update_db_model.py: 100%

196 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-17 13:09 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""Xref Update database ORM.""" 

15# Ignore some pylint and mypy checks due to the nature of SQLAlchemy ORMs 

16# pylint: disable=missing-class-docstring,too-many-lines 

17# mypy: disable-error-code="misc, valid-type" 

18 

19from sqlalchemy import Column, Index, Enum, DateTime, text 

20from sqlalchemy.dialects.mysql import ( 

21 INTEGER, 

22 VARCHAR, 

23 TEXT, 

24 MEDIUMTEXT, 

25 TINYINT, 

26 CHAR, 

27 SMALLINT, 

28 DOUBLE, 

29) 

30from sqlalchemy.orm import declarative_base 

31 

32Base = declarative_base() 

33 

34 

35class Xref(Base): 

36 __tablename__ = "xref" 

37 __table_args__ = ( 

38 Index( 

39 "acession_idx", 

40 "accession", 

41 "source_id", 

42 "species_id", 

43 "label", 

44 unique=True, 

45 mysql_length={"accession": 100, "label": 100}, 

46 ), 

47 Index("species_source_idx", "species_id", "source_id"), 

48 ) 

49 

50 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

51 accession: Column = Column(VARCHAR(255), nullable=False) 

52 version: Column = Column(INTEGER(10, unsigned=True)) 

53 label: Column = Column(VARCHAR(255)) 

54 description: Column = Column(TEXT) 

55 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

56 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

57 info_type: Column = Column( 

58 Enum( 

59 "NONE", 

60 "PROJECTION", 

61 "MISC", 

62 "DEPENDENT", 

63 "DIRECT", 

64 "SEQUENCE_MATCH", 

65 "INFERRED_PAIR", 

66 "PROBE", 

67 "UNMAPPED", 

68 "COORDINATE_OVERLAP", 

69 "CHECKSUM", 

70 ), 

71 nullable=False, 

72 server_default=text("'NONE'"), 

73 ) 

74 info_text: Column = Column(VARCHAR(255), nullable=False, server_default=text("''")) 

75 dumped: Column = Column( 

76 Enum( 

77 "MAPPED", 

78 "NO_DUMP_ANOTHER_PRIORITY", 

79 "UNMAPPED_NO_MAPPING", 

80 "UNMAPPED_NO_MASTER", 

81 "UNMAPPED_MASTER_FAILED", 

82 "UNMAPPED_NO_STABLE_ID", 

83 "UNMAPPED_INTERPRO", 

84 ) 

85 ) 

86 

87 

88class PrimaryXref(Base): 

89 __tablename__ = "primary_xref" 

90 

91 xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

92 sequence: Column = Column(MEDIUMTEXT) 

93 sequence_type: Column = Column(Enum("dna", "peptide")) 

94 status: Column = Column(Enum("experimental", "predicted")) 

95 

96 

97class DependentXref(Base): 

98 __tablename__ = "dependent_xref" 

99 

100 object_xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

101 master_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

102 dependent_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

103 linkage_annotation: Column = Column(VARCHAR(255)) 

104 linkage_source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, primary_key=True) 

105 

106 

107class Synonym(Base): 

108 __tablename__ = "synonym" 

109 

110 xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

111 synonym: Column = Column(VARCHAR(255), index=True, primary_key=True) 

112 

113 

114class Source(Base): 

115 __tablename__ = "source" 

116 

117 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

118 name: Column = Column(VARCHAR(255), nullable=False, index=True) 

119 status: Column = Column( 

120 Enum("KNOWN", "XREF", "PRED", "ORTH", "PSEUDO", "LOWEVIDENCE", "NOIDEA"), 

121 nullable=False, 

122 server_default=text("'NOIDEA'"), 

123 ) 

124 source_release: Column = Column(VARCHAR(255)) 

125 ordered: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

126 priority: Column = Column(INTEGER(5, unsigned=True), server_default=text("1")) 

127 priority_description: Column = Column(VARCHAR(40), server_default=text("''")) 

128 

129 

130class SourceURL(Base): 

131 __tablename__ = "source_url" 

132 

133 source_url_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

134 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

135 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

136 parser: Column = Column(VARCHAR(255)) 

137 

138 

139class SourceMappingMethod(Base): 

140 __tablename__ = "source_mapping_method" 

141 

142 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

143 method: Column = Column(VARCHAR(255), primary_key=True) 

144 

145 

146class GeneDirectXref(Base): 

147 __tablename__ = "gene_direct_xref" 

148 

149 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

150 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

151 linkage_xref: Column = Column(VARCHAR(255)) 

152 

153 

154class TranscriptDirectXref(Base): 

155 __tablename__ = "transcript_direct_xref" 

156 

157 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

158 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

159 linkage_xref: Column = Column(VARCHAR(255)) 

160 

161 

162class TranslationDirectXref(Base): 

163 __tablename__ = "translation_direct_xref" 

164 

165 general_xref_id: Column = Column(INTEGER(10, unsigned=True), index=True, primary_key=True) 

166 ensembl_stable_id: Column = Column(VARCHAR(255), index=True, primary_key=True) 

167 linkage_xref: Column = Column(VARCHAR(255)) 

168 

169 

170class Species(Base): 

171 __tablename__ = "species" 

172 

173 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True) 

174 taxonomy_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True, primary_key=True) 

175 name: Column = Column(VARCHAR(255), nullable=False, index=True) 

176 aliases: Column = Column(VARCHAR(255)) 

177 

178 

179class Pairs(Base): 

180 __tablename__ = "pairs" 

181 

182 pair_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

183 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

184 accession1: Column = Column(VARCHAR(255), nullable=False, index=True) 

185 accession2: Column = Column(VARCHAR(255), nullable=False, index=True) 

186 

187 

188class CoordinateXref(Base): 

189 __tablename__ = "coordinate_xref" 

190 __table_args__ = ( 

191 Index("start_pos_idx", "species_id", "chromosome", "strand", "txStart"), 

192 Index("end_pos_idx", "species_id", "chromosome", "strand", "txEnd"), 

193 ) 

194 

195 coord_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

196 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

197 species_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

198 accession: Column = Column(VARCHAR(255), nullable=False) 

199 chromosome: Column = Column(VARCHAR(255), nullable=False) 

200 strand: Column = Column(TINYINT(2), nullable=False) 

201 txStart: Column = Column(INTEGER(10), nullable=False) 

202 txEnd: Column = Column(INTEGER(10), nullable=False) 

203 cdsStart: Column = Column(INTEGER(10)) 

204 cdsEnd: Column = Column(INTEGER(10)) 

205 exonStarts: Column = Column(TEXT, nullable=False) 

206 exonEnds: Column = Column(TEXT, nullable=False) 

207 

208 

209class ChecksumXref(Base): 

210 __tablename__ = "checksum_xref" 

211 __table_args__ = (Index("checksum_idx", "checksum", mysql_length=10),) 

212 

213 checksum_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

214 source_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

215 accession: Column = Column(CHAR(14), nullable=False) 

216 checksum: Column = Column(CHAR(32), nullable=False) 

217 

218 

219class Mapping(Base): 

220 __tablename__ = "mapping" 

221 

222 job_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

223 type: Column = Column(Enum("dna", "peptide", "UCSC")) 

224 command_line: Column = Column(TEXT) 

225 percent_query_cutoff: Column = Column(INTEGER(10, unsigned=True)) 

226 percent_target_cutoff: Column = Column(INTEGER(10, unsigned=True)) 

227 method: Column = Column(VARCHAR(255)) 

228 array_size: Column = Column(INTEGER(10, unsigned=True)) 

229 

230 

231class MappingJobs(Base): 

232 __tablename__ = "mapping_jobs" 

233 

234 mapping_job_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True) 

235 root_dir: Column = Column(TEXT) 

236 map_file: Column = Column(VARCHAR(255)) 

237 status: Column = Column(Enum("SUBMITTED", "FAILED", "SUCCESS")) 

238 out_file: Column = Column(VARCHAR(255)) 

239 err_file: Column = Column(VARCHAR(255)) 

240 array_number: Column = Column(INTEGER(10, unsigned=True)) 

241 job_id: Column = Column(INTEGER(10, unsigned=True)) 

242 failed_reason: Column = Column(VARCHAR(255)) 

243 object_xref_start: Column = Column(INTEGER(10, unsigned=True)) 

244 object_xref_end: Column = Column(INTEGER(10, unsigned=True)) 

245 

246 

247class GeneTranscriptTranslation(Base): 

248 __tablename__ = "gene_transcript_translation" 

249 

250 gene_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

251 transcript_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

252 translation_id: Column = Column(INTEGER(10, unsigned=True), index=True) 

253 

254 

255class ProcessStatus(Base): 

256 __tablename__ = "process_status" 

257 

258 id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

259 status: Column = Column( 

260 Enum( 

261 "xref_created", 

262 "parsing_started", 

263 "parsing_finished", 

264 "alt_alleles_added", 

265 "xref_fasta_dumped", 

266 "core_fasta_dumped", 

267 "core_data_loaded", 

268 "mapping_submitted", 

269 "mapping_finished", 

270 "mapping_processed", 

271 "direct_xrefs_parsed", 

272 "prioritys_flagged", 

273 "processed_pairs", 

274 "biomart_test_finished", 

275 "source_level_move_finished", 

276 "alt_alleles_processed", 

277 "official_naming_done", 

278 "checksum_xrefs_started", 

279 "checksum_xrefs_finished", 

280 "coordinate_xrefs_started", 

281 "coordinate_xref_finished", 

282 "tests_started", 

283 "tests_failed", 

284 "tests_finished", 

285 "core_loaded", 

286 "display_xref_done", 

287 "gene_description_done", 

288 ) 

289 ) 

290 date: Column = Column(DateTime, nullable=False) 

291 

292 

293class DisplayXrefPriority(Base): 

294 __tablename__ = "display_xref_priority" 

295 

296 ensembl_object_type: Column = Column(VARCHAR(100), primary_key=True) 

297 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

298 priority: Column = Column(SMALLINT(unsigned=True), nullable=False) 

299 

300 

301class GeneDescPriority(Base): 

302 __tablename__ = "gene_desc_priority" 

303 

304 source_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

305 priority: Column = Column(SMALLINT(unsigned=True), nullable=False) 

306 

307 

308class AltAllele(Base): 

309 __tablename__ = "alt_allele" 

310 

311 alt_allele_id: Column = Column(INTEGER(10, unsigned=True), autoincrement=True, primary_key=True) 

312 gene_id: Column = Column(INTEGER(10, unsigned=True), index=True) 

313 is_reference: Column = Column(INTEGER(2, unsigned=True), server_default=text("0")) 

314 

315 

316class GeneStableId(Base): 

317 __tablename__ = "gene_stable_id" 

318 

319 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

320 stable_id: Column = Column(VARCHAR(128), primary_key=True) 

321 display_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

322 desc_set: Column = Column(INTEGER(10, unsigned=True), server_default=text("0")) 

323 

324 

325class TranscriptStableId(Base): 

326 __tablename__ = "transcript_stable_id" 

327 

328 internal_id: Column = Column(INTEGER(10, unsigned=True), nullable=False, index=True) 

329 stable_id: Column = Column(VARCHAR(128), primary_key=True) 

330 display_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

331 biotype: Column = Column(VARCHAR(40), nullable=False) 

332 

333 

334class TranslationStableId(Base): 

335 __tablename__ = "translation_stable_id" 

336 

337 internal_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

338 stable_id: Column = Column(VARCHAR(128), nullable=False, index=True) 

339 

340 

341class ObjectXref(Base): 

342 __tablename__ = "object_xref" 

343 __table_args__ = ( 

344 Index( 

345 "unique_idx", 

346 "ensembl_object_type", 

347 "ensembl_id", 

348 "xref_id", 

349 "ox_status", 

350 "master_xref_id", 

351 unique=True, 

352 ), 

353 Index("oxref_idx", "object_xref_id", "xref_id", "ensembl_object_type", "ensembl_id"), 

354 Index("xref_idx", "xref_id", "ensembl_object_type"), 

355 ) 

356 

357 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True, autoincrement=True) 

358 ensembl_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

359 ensembl_object_type: Column = Column( 

360 Enum("RawContig", "Transcript", "Gene", "Translation"), nullable=False 

361 ) 

362 xref_id: Column = Column(INTEGER(10, unsigned=True), nullable=False) 

363 linkage_annotation: Column = Column(VARCHAR(255)) 

364 linkage_type: Column = Column( 

365 Enum( 

366 "PROJECTION", 

367 "MISC", 

368 "DEPENDENT", 

369 "DIRECT", 

370 "SEQUENCE_MATCH", 

371 "INFERRED_PAIR", 

372 "PROBE", 

373 "UNMAPPED", 

374 "COORDINATE_OVERLAP", 

375 "CHECKSUM", 

376 ) 

377 ) 

378 ox_status: Column = Column( 

379 Enum("DUMP_OUT", "FAILED_PRIORITY", "FAILED_CUTOFF", "NO_DISPLAY", "MULTI_DELETE"), 

380 nullable=False, 

381 server_default=text("'DUMP_OUT'"), 

382 ) 

383 unused_priority: Column = Column(INTEGER(10, unsigned=True)) 

384 master_xref_id: Column = Column(INTEGER(10, unsigned=True)) 

385 

386 

387class IdentityXref(Base): 

388 __tablename__ = "identity_xref" 

389 

390 object_xref_id: Column = Column(INTEGER(10, unsigned=True), primary_key=True) 

391 query_identity: Column = Column(INTEGER(5)) 

392 target_identity: Column = Column(INTEGER(5)) 

393 hit_start: Column = Column(INTEGER(10)) 

394 hit_end: Column = Column(INTEGER(10)) 

395 translation_start: Column = Column(INTEGER(10)) 

396 translation_end: Column = Column(INTEGER(10)) 

397 cigar_line: Column = Column(TEXT) 

398 score: Column = Column(DOUBLE) 

399 evalue: Column = Column(DOUBLE) 

400 

401 

402class Meta(Base): 

403 __tablename__ = "meta" 

404 __table_args__ = ( 

405 Index("species_key_value_idx", "meta_id", "species_id", "meta_key", "meta_value", unique=True), 

406 Index("species_value_idx", "species_id", "meta_value"), 

407 ) 

408 

409 meta_id: Column = Column(INTEGER(10), primary_key=True, autoincrement=True) 

410 species_id: Column = Column(INTEGER(10, unsigned=True), server_default=text("1")) 

411 meta_key: Column = Column(VARCHAR(40), nullable=False) 

412 meta_value: Column = Column(VARCHAR(255, binary=True), nullable=False) 

413 date: Column = Column(DateTime, nullable=False)