| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21
22 from csb.bio.structure import SecondaryStructure
26
27 ISites = 'IS'
28 HMMFragments = 'HH'
29 HHThread = 'TH'
30 HHfrag = HHThread
31 Rosetta = 'NN'
32
38
39 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
40 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
41 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
42 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
43 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
44 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
45 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
46 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
47 53: 11.831245255954073, 54: 11.918975893263905 }
50 """
51 Base class, representing a match between a fragment and its target.
52 """
53
55
56 self._id = id
57 self._qstart = qstart
58 self._qend = qend
59 self._probability = probability
60 self._rmsd = rmsd
61 self._tm_score = tm_score
62 self._qlength = qlength
63
64 @property
67
68 @property
71
72 @property
75
76 @property
79
80 @property
83
84 @property
87
88 @property
91
92 @property
95
96 @property
99
100 @property
103
104 @property
107
117
124
126 """
127 Fragment-based phi/psi angles predictor.
128
129 @param target: target protein, containing fragment assignments
130 @type target: L{Target}
131 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
132 @type threshold: float
133 @param extend: pick alternative, longer cluster reps, if possible
134 @type extend: bool
135 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
136 will be performed on demand (the first time C{predictor.compute()} is invoked)
137
138 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
139 time. Subsequent calls will be very fast.
140 """
141
143
144 if not isinstance(target, Target):
145 raise TypeError(target)
146 if target.matches.length == 0:
147 raise ValueError('This target has no fragment assignments')
148
149 self._target = target
150 self._threshold = float(threshold)
151 self._extend = bool(extend)
152
153 self._initialized = False
154 self._reps = {}
155 self._clusters = {}
156
157 if init:
158 self.init()
159
160 @property
163
164 @property
167
168 @property
171
173 """
174 Compute and cache all L{FragmentCluster}s.
175 """
176
177 self._reps = {}
178 self._clusters = {}
179
180 for residue in self.target.residues:
181 cluster = self._filter(residue)
182
183 if cluster is not None:
184 rep = cluster.centroid()
185 if rep.has_alternative:
186 rep.exchange()
187
188 self._reps[residue.native.rank] = rep
189 self._clusters[residue.native.rank] = cluster.items
190
191 self._initialized = True
192
194
195 try:
196 nodes = []
197 for ai in residue.assignments:
198 node = ClusterNode.create(ai.fragment)
199 nodes.append(node)
200
201 cluster = FragmentCluster(nodes, threshold=self.threshold)
202 cluster.shrink(minitems=0)
203
204 return cluster
205
206 except (ClusterExhaustedError, ClusterDivergingError):
207 return None
208
210
211 for r in self._target.residues:
212 if r.native.rank == rank:
213 return r
214
215 raise ValueError('Rank {0} is out of range'.format(rank))
216
218 """
219 Extract torsion angles from the L{ClusterRep} at residue C{#rank}.
220
221 @param rank: target residue rank
222 @type rank: int
223
224 @rtype: L{TorsionPredictionInfo}
225 """
226
227 residue = self._residue(rank)
228 rep = residue.filter(threshold=self.threshold, extend=self.extend)
229
230 if rep is None:
231 return None
232
233 else:
234 fragment = rep.centroid
235 torsion = fragment.torsion_at(rank, rank)[0]
236 ss = fragment.sec_structure_at(rank, rank)[0]
237
238 return TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
239
241 """
242 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}.
243
244 @param rank: target residue rank
245 @type rank: int
246
247 @return: L{TorsionPredictionInfo} instances, sorted by confidence
248 @rtype: tuple of L{TorsionPredictionInfo}
249 """
250
251 if not self._initialized:
252 self.init()
253
254 prediction = []
255
256 for rep in self._reps.values():
257
258 if rep.centroid.qstart <= rank <= rep.centroid.qend:
259
260 fragment = rep.centroid
261 torsion = fragment.torsion_at(rank, rank)[0]
262 ss = fragment.sec_structure_at(rank, rank)[0]
263 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss)
264
265 if rep is self._reps.get(rank, None):
266 info.primary = True
267
268 prediction.append(info)
269
270 prediction.sort(reverse=True)
271 return tuple(prediction)
272
274 """
275 Filter the current fragment map and create a new, completely flat,
276 non-overlapping map built from centroids, assigned iteratively by
277 decreasing confidence. Centroids with lower confidence which overlap
278 with previously assigned centroids will be trimmed to fill existing
279 gaps only.
280
281 @return: L{TorsionPredictionInfo} instances, one for each target residue
282 @rtype: tuple of L{TorsionPredictionInfo}
283 """
284
285 if not self._initialized:
286 self.init()
287
288 prediction = []
289 slots = set(range(1, self.target.length + 1))
290
291 reps = list(self._reps.values())
292 reps.sort(key=lambda i: i.confidence, reverse=True)
293
294 for rep in reps:
295
296 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
297 if rank in slots:
298 torsion = rep.centroid.torsion_at(rank, rank)[0]
299 ss = rep.centroid.sec_structure_at(rank, rank)[0]
300 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
301
302 prediction.append(info)
303 slots.remove(rank)
304
305 for rank in slots:
306 prediction.append(TorsionPredictionInfo(rank, 0, None))
307
308 prediction.sort(key=lambda i: i.rank)
309 return tuple(prediction)
310
312 """
313 Extract all torsion angles coming from all fragments, which had survived
314 the filtering and cover residue C{#rank}.
315
316 @param rank: target residue rank
317 @type rank: int
318
319 @return: all L{TorsionAngles} for a cluster at the specified residue
320 @rtype: tuple of L{TorsionAngles}
321 """
322
323 if not self._initialized:
324 self.init()
325 if rank not in self._clusters:
326 return tuple()
327
328 angles = []
329
330 for node in self._clusters[rank]:
331 fragment = node.fragment
332 torsion = fragment.torsion_at(rank, rank)[0]
333 angles.append(torsion)
334
335 return tuple(angles)
336
339 """
340 Struct container for a single torsion angle prediction.
341
342 @param rank: target residue rank
343 @type rank: int
344 @param confidence: confidence of prediction
345 @type confidence: float
346 @param torsion: assigned phi/psi/omega angles
347 @type torsion: L{TorsionAngles}
348 @param dssp: assigned secondary structure
349 @type dssp: L{SecondaryStructureElement}
350 @param primary: if True, designates that the assigned angles are extracted
351 from the L{ClusterRep} at residue C{#rank}; otherwise: the
352 angles are coming from another, overlapping L{ClusterRep}
353
354 """
355
357
358 self.rank = rank
359 self.confidence = confidence
360 self.torsion = torsion
361 self.primary = primary
362 self.dssp = dssp
363
365 """
366 @return: convert this prediction to a tuple: (confidence, phi, psi, omega)
367 @rtype: tuple
368 """
369 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
370
372 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
373
376
388
399
401 """
402 Represents a protein structure prediction target.
403
404 @param id: target sequence ID, in PDB accnC format
405 @type id: str
406 @param length: total target sequence length
407 @type length: int
408 @param residues: a list, containing target's residues. See also
409 L{Target.from_sequence}
410 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
411 """
412
413 - def __init__(self, id, length, residues, overlap=None, segments=None, factory=AssignmentFactory()):
414
415 self._id = id
416 self._accession = id[:-1]
417 self._chain_id = id[-1]
418 self._length = length
419 self._overlap = overlap
420 self._factory = factory
421
422 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
423 self._errors = csb.core.CollectionContainer()
424
425 resi = [factory.residue(native) for native in residues]
426 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi,
427 type=TargetResidue, start_index=1)
428
429 if segments is not None:
430 segments = dict([(s.start, s) for s in segments])
431 self._segments = csb.core.ReadOnlyDictionaryContainer(items=segments)
432
433 @staticmethod
435 """
436 Factory, which builds L{Target} objects from a bare sequence.
437
438 @param sequence: target's sequence
439 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable
440
441 @rtype: L{Target}
442 """
443
444 if isinstance(sequence, csb.bio.sequence.Sequence):
445 sequence = sequence.sequence
446
447 residues = []
448
449 for rn, aa in enumerate(sequence, start=1):
450 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa)
451 residues.append(residue)
452
453 return Target(id, len(residues), residues)
454
455 @staticmethod
457 """
458 Factory, which builds L{Target} objects from an HMM profile.
459
460 @param hmm: target's HMM
461 @type hmm: L{csb.bio.hmm.ProfileHMM}
462
463 @rtype: L{Target}
464 """
465
466 residues = [ r.clone() for r in hmm.residues ]
467 return Target(hmm.id, hmm.layers.length, residues)
468
469 @staticmethod
474
475 @property
478
479 @property
482
483 @property
486
487 @property
490
491 @property
494
495 @property
498
499 @property
502
503 @property
506
507 @property
510
511 @property
514
515 @property
518
520 """
521 Add a new fragment match.
522 @param fragment: fragment to assign
523 @type fragment: L{Assignment}
524 """
525
526 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
527 raise ValueError("Fragment out of range")
528
529 self._assignments._append_item(fragment)
530
531 for rank in range(fragment.qstart, fragment.qend + 1):
532 ai = ResidueAssignmentInfo(fragment, rank)
533 self._residues[rank].assign(ai)
534
535 if fragment.segment is not None:
536 try:
537 self._segments[fragment.segment].assign(fragment)
538 except KeyError:
539 raise ValueError("Undefined segment starting at {0}".format(fragment.segment))
540
542 """
543 Assign a bunch of fragments at once.
544 @type fragments: iterable of L{Assignment}s
545 """
546 for frag in fragments:
547 self.assign(frag)
548
550 """
551 Filter the current fragment map using a L{FragmentCluster}.
552
553 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
554 @type threshold: float
555 @param extend: pick extended alternatives where possible (default=False)
556 @type extend: bool
557
558 @return: a new target, containing only cluster centroids/reps
559 @rtype: L{Target}
560 """
561
562 target = self.clone()
563
564 for residue in self.residues:
565 rep = residue.filter(threshold=threshold, extend=extend)
566
567 if rep is not None:
568 target.assign(rep.centroid)
569
570 return target
571
573 """
574 @return: a deep copy of the target
575 @rtype: L{Target}
576 """
577
578 segments = [self.segments[start] for start in self.segments]
579 segments = [TargetSegment(s.start, s.end, s.count) for s in segments]
580
581 target = self._factory.target(self.id, self.length, [r.native for r in self.residues],
582 overlap=self._overlap, segments=segments)
583
584 return target
585
587
589
590 super(ChemShiftTarget, self).__init__(id, length, residues, overlap=overlap,
591 segments=None, factory=ChemShiftAssignmentFactory())
592
594
595 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
596 raise ValueError("Fragment out of range")
597
598 self._assignments._append_item(fragment)
599
600 rank = fragment.qstart
601 ai = ResidueAssignmentInfo(fragment, rank)
602 self._residues[rank].assign(ai)
603
607
609 """
610 Wrapper around L{Target}'s native residues. Decorates them with additional,
611 fragment-related methods.
612
613 @type native_residue: L{csb.bio.structure.ProteinResidue}
614 """
615
617
618 self._type = native_residue.type
619 self._native = native_residue.clone()
620 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
621
622 @property
625
626 @property
629
630 @property
633
636
638 """
639 @return: the fragment with the lowest RMSD at this position in the L{Target}
640 @rtype: L{Assignment}
641 """
642
643 best = None
644
645 for ai in self.assignments:
646 a = ai.fragment
647 if a.length < FragmentCluster.MIN_LENGTH:
648 continue
649 if best is None or a.rmsd < best.rmsd:
650 best = a
651 elif a.rmsd == best.rmsd and a.length > best.length:
652 best = a
653
654 return best
655
657 """
658 Filter all fragments, covering this position in the L{Target} using a
659 L{FragmentCluster}.
660
661 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
662 @type method: str
663 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
664 @type threshold: float
665 @param extend: pick extended alternative where possible (default=False)
666 @type extend: bool
667
668 @return: cluster's representative (if converged) or None
669 @rtype: L{ClusterRep} or None
670 """
671
672 try:
673 nodes = []
674 for ai in self.assignments:
675 node = ClusterNode.create(ai.fragment, method, extend)
676 nodes.append(node)
677
678 cluster = FragmentCluster(nodes, threshold=threshold)
679
680 center = cluster.shrink(minitems=0)
681 if center.has_alternative:
682 center.exchange()
683
684 return center
685
686 except (ClusterExhaustedError, ClusterDivergingError):
687 return None
688
690 """
691 @return: the longest fragment, covering the current position
692 @rtype: L{Assignment}
693 """
694 best = None
695
696 for q in self.assignments:
697 if best is None or (q.fragment.length > best.length):
698 best = q.fragment
699
700 return best
701
703 """
704 @return: the residue-wise precision of the fragment library at the
705 current position (percentage).
706
707 @param threshold: true-positive RMSD cutoff (default=1.5)
708 @type threshold: float
709 @rtype: float
710 """
711
712 if self.assignments.length < 1:
713 return None
714 else:
715 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
716 pos = len(positive) * 100.0 / self.assignments.length
717
718 return pos
719
721
723
724 best = None
725
726 for ai in self.assignments:
727 a = ai.fragment
728
729 if a.score < ChemShiftAssignment.BIT_SCORE_THRESHOLD * a.window:
730 continue
731
732 if best is None or a.score > best.score:
733 best = a
734 elif a.score == best.score and a.length > best.length:
735 best = a
736
737 return best
738
740
742
743 self._start = start
744 self._end = end
745 self._count = count
746
747 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
748
749 @property
752
753 @property
756
757 @property
760
761 @property
764
765 @property
768
770 if fragment.segment != self.start:
771 raise ValueError('Segment origin mismatch: {0} vs {1}'.format(fragment.segment, self.start))
772 else:
773 self._assignments._append_item(fragment)
774
776
777 best = None
778
779 for a in self.assignments:
780 if a.length < FragmentCluster.MIN_LENGTH:
781 continue
782 if best is None or a.rmsd < best.rmsd:
783 best = a
784 elif a.rmsd == best.rmsd and a.length > best.length:
785 best = a
786
787 return best
788
790
791 try:
792 cluster = FragmentCluster(self.assignments, threshold=1.5,
793 connectedness=0.5, method=method)
794 centroid = cluster.shrink(minitems=1)
795 return centroid
796
797 except ClusterExhaustedError:
798 return None
799 finally:
800 del cluster
801
803
804 best = None
805
806 for q in self.assignments:
807 if best is None or (q.length > best.length):
808 best = q
809
810 return best
811
813
814 rmsds = []
815
816 for q in self.assignments:
817 for s in self.assignments:
818 if q is not s:
819 r = q.rmsd_to(s, min_overlap)
820 if r is not None:
821 rmsds.append(r)
822 else:
823 assert q.rmsd_to(s, 1) < 0.01
824
825 return rmsds
826
828
829 mdas = []
830
831 for q in self.assignments:
832 for s in self.assignments:
833 if q is not s:
834 m = q.mda_to(s, min_overlap)
835 if m is not None:
836 mdas.append(m)
837 return mdas
838
840
841 from csb.bio.hmm import RELATIVE_SA
842 from csb.bio.io.hhpred import ScoreUnits, HHProfileParser
843
844 def convert_sa(sa):
845 return numpy.array([ RELATIVE_SA[i] for i in sa ])
846
847 sources = {}
848 scores = []
849
850 for q in self.assignments:
851 for s in self.assignments:
852
853 if s.source_id not in sources:
854 hmm = HHProfileParser(os.path.join(profiles, s.source_id + '.hhm')).parse()
855 sources[s.source_id] = hmm.dssp_solvent
856
857 if q is not s:
858
859 common = q.overlap(s)
860 if len(common) >= min_overlap:
861
862 qsa = q.solvent_at(sources[q.source_id], min(common), max(common))
863 ssa = s.solvent_at(sources[s.source_id], min(common), max(common))
864
865 if '-' in qsa + ssa:
866 continue
867
868 qsa = convert_sa(qsa)
869 ssa = convert_sa(ssa)
870 assert len(qsa) == len(ssa)
871 sa_rmsd = numpy.sqrt(numpy.sum((qsa - ssa) ** 2) / float(len(qsa)))
872
873 scores.append(sa_rmsd)
874 return scores
875
877
878 from csb.bio.hmm import BACKGROUND
879 back = numpy.sqrt(numpy.array(BACKGROUND))
880
881 sources = {}
882 scores = []
883
884 for q in self.assignments:
885 for s in self.assignments:
886
887 if s.source_id not in sources:
888 # hmm = HHProfileParser(os.path.join(hmm_path, s.source_id + '.hhm')).parse(ScoreUnits.Probability)
889 sources[s.source_id] = csb.io.Pickle.load(open(os.path.join(profiles, s.source_id + '.pkl'), 'rb'))
890
891 if q is not s:
892
893 common = q.overlap(s)
894 if len(common) >= min_overlap:
895
896 qprof = q.profile_at(sources[q.source_id], min(common), max(common))
897 sprof = s.profile_at(sources[s.source_id], min(common), max(common))
898
899 #score = qhmm.emission_similarity(shmm)
900 assert len(qprof) == len(sprof)
901 dots = [ numpy.dot(qprof[i] / back, sprof[i] / back) for i in range(len(qprof)) ]
902 score = numpy.log(numpy.prod(dots))
903 if score is not None:
904 scores.append(score)
905 return scores
906
908
909 binsize = float(binsize)
910 bins = numpy.ceil(numpy.array(data) / binsize)
911
912 hist = dict.fromkeys(bins, 0)
913 for bin in bins:
914 hist[bin] += (1.0 / len(bins))
915
916 freq = numpy.array(hist.values())
917 return - numpy.sum(freq * numpy.log(freq))
918
923
928
930
931 rmsds = self.pairwise_rmsd()
932
933 if len(rmsds) < 1:
934 return None
935
936 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
937
939
940 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
941
942 if len(sa_rmsds) < 1:
943 return None
944
945 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
946
948
949 if self.assignments.length < 1:
950 return None
951
952 return sum([1 for i in self.assignments if i.rmsd <= threshold]) / float(self.assignments.length)
953
955
956 cons = self.rmsd_consistency()
957
958 if cons is None:
959 return 0
960 else:
961 return numpy.log10(self.count) * cons
962
964
966
967 if not assignment.qstart <= rank <= assignment.qend:
968 raise ValueError('Rank {0} is not matched by this assignment')
969
970 self._assignment = assignment
971 self._rank = rank
972 self._relrank = rank - assignment.qstart
973
974 @property
976 return self._assignment.backbone[self._relrank]
977
978 @property
981
983 """
984 Represents a match between a fragment and its target.
985
986 @param source: source structure (must have torsion angles precomputed)
987 @type source: L{csb.bio.structure.Chain}
988 @param start: start position in C{source} (rank)
989 @type start: int
990 @param end: end position in C{source} (rank)
991 @type end: int
992 @param id: fragment ID
993 @type id: str
994 @param qstart: start position in target (rank)
995 @type qstart: int
996 @param qend: end position in target (rank)
997 @type qend: int
998 @param probability: probability of assignment
999 @type probability: float
1000 @param rmsd: RMSD of the fragment, compared to target's native structure
1001 @type rmsd: float
1002 """
1003
1004 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
1005 tm_score=None, score=None, neff=None, segment=None, internal_id=None):
1006
1007 assert source.has_torsion
1008 sub = source.subregion(start, end, clone=True)
1009 try:
1010 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
1011 except csb.core.ItemNotFoundError:
1012 raise csb.bio.structure.Broken3DStructureError()
1013 torsion = [r.torsion.copy() for r in sub.residues]
1014
1015 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
1016 self._torsion = torsion
1017 self._sequence = sub.sequence
1018
1019 self._source_id = source.accession[:4] + source.id
1020 self._start = start
1021 self._end = end
1022
1023 self._score = score
1024 self._neff = neff
1025 self._ss = None
1026
1027 self._segment_start = segment
1028 self.internal_id = internal_id
1029
1030 if id is None:
1031 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
1032
1033 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
1034
1035 self._ss = SecondaryStructure('-' * self.length)
1036
1037 @staticmethod
1039 """
1040 Create a new L{Assignment} given a source rosetta fragment.
1041
1042 @param fragment: rosetta fragment
1043 @type fragment: L{RosettaFragment}
1044 @param provider: PDB database provider
1045 @type provider: L{StructureProvider}
1046
1047 @rtype: L{Assignment}
1048 """
1049 try:
1050 structure = provider.get(fragment.accession)
1051 except KeyError:
1052 structure = provider.get(fragment.source_id)
1053 source = structure.chains[fragment.chain]
1054 source.compute_torsion()
1055
1056 id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end)
1057
1058 return Assignment(source, fragment.start, fragment.end,
1059 fragment.qstart, fragment.qend, id, 0, 0)
1060
1061 @property
1064
1065 @property
1068
1069 @property
1072
1073 @property
1076
1077 @property
1080
1081 @property
1084
1085 @property
1088
1089 @property
1092
1093 @property
1096
1097 @property
1100 @secondary_structure.setter
1102
1103 if isinstance(value, csb.core.string):
1104 value = csb.bio.structure.SecondaryStructure(value)
1105 if len(str(value)) != self.length:#(value.end - value.start + 1) != self.length:
1106 raise ValueError("Invalid secondary structure length", len(str(value)), self.length )
1107
1108 self._ss = value
1109
1111 """
1112 Apply rotation/translation to fragment's coordinates in place.
1113 """
1114
1115 for ca in self.backbone:
1116 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation
1117 for i in range(3):
1118 ca[i] = newca[i]
1119
1121
1122 if not (self.qstart <= qstart <= qend <= self.qend):
1123 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
1124
1126 """
1127 @return: True if the fragment is centered around position=C{rank}.
1128 @rtype: bool
1129 """
1130
1131 if self.qstart < rank < self.qend:
1132 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1133 return True
1134
1135 return False
1136
1138 """
1139 @return: the CA coordinates of the fragment at the specified subregion.
1140 @rtype: list
1141 """
1142
1143 self._check_range(qstart, qend)
1144
1145 relstart = qstart - self.qstart
1146 relend = qend - self.qstart + 1
1147
1148 return self.backbone[relstart : relend]
1149
1151 """
1152 @return: the torsion angles of the fragment at the specified subregion.
1153 @rtype: list
1154 """
1155
1156 self._check_range(qstart, qend)
1157
1158 relstart = qstart - self.qstart
1159 relend = qend - self.qstart + 1
1160
1161 return self.torsion[relstart : relend]
1162
1164
1165 self._check_range(qstart, qend)
1166
1167 relstart = qstart - self.qstart
1168 relend = qend - self.qstart + 1
1169
1170 return sa_string[relstart : relend]
1171
1173
1174 self._check_range(qstart, qend)
1175 start = qstart - self.qstart + 1
1176 end = qend - self.qstart + 1
1177
1178 return self.secondary_structure.scan(start, end, loose=True, cut=True)
1179
1181
1182 self._check_range(qstart, qend)
1183
1184 start = qstart - self.qstart + self.start
1185 end = qend - self.qstart + self.start
1186
1187 if hasattr(source, 'subregion'):
1188 return source.subregion(start, end)
1189 else:
1190 return source[start - 1 : end]
1191
1193
1194 self._check_range(qstart, qend)
1195
1196 start = qstart - self.qstart + self.start
1197 end = qend - self.qstart + self.start
1198
1199 return source.subregion(start, end)
1200
1202 """
1203 @type other: L{Assignment}
1204 @return: target positions, covered by both C{self} and C{other}
1205 @rtype: set of int
1206 """
1207
1208 qranks = set(range(self.qstart, self.qend + 1))
1209 sranks = set(range(other.qstart, other.qend + 1))
1210
1211 return qranks.intersection(sranks)
1212
1214 """
1215 @return: the CA RMSD between C{self} and C{other}.
1216
1217 @param other: another fragment
1218 @type other: L{Assignment}
1219 @param min_overlap: require at least that number of overlapping residues
1220 (return None if not satisfied)
1221 @type min_overlap: int
1222
1223 @rtype: float
1224 """
1225 if self is other:
1226 return 0
1227
1228 common = self.overlap(other)
1229
1230 if len(common) >= min_overlap:
1231
1232 qstart, qend = min(common), max(common)
1233
1234 q = self.backbone_at(qstart, qend)
1235 s = other.backbone_at(qstart, qend)
1236
1237 if len(q) > 0 and len(s) > 0:
1238 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1239
1240 return None
1241
1243
1244 common = self.overlap(other)
1245
1246 if len(common) >= min_overlap:
1247
1248 qstart, qend = min(common), max(common)
1249
1250 q = self.backbone_at(qstart, qend)
1251 s = other.backbone_at(qstart, qend)
1252
1253 if len(q) > 0 and len(s) > 0:
1254 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ]
1255
1256 return None
1257
1259
1260 common = self.overlap(other)
1261
1262 if len(common) >= min_overlap:
1263
1264 qstart, qend = min(common), max(common)
1265
1266 q = self.torsion_at(qstart, qend)
1267 s = other.torsion_at(qstart, qend)
1268
1269 if len(q) > 0 and len(s) > 0:
1270
1271 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L
1272 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1
1273
1274 return max(maxphi, maxpsi)
1275
1276 return None
1277
1279 """
1280 @deprecated: this method will be deleted soon. Use
1281 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1282 """
1283 stream = csb.io.MemoryStream()
1284
1285 if weight is None:
1286 weight = self.probability
1287 if not qstart:
1288 qstart = self.qstart
1289 if not qend:
1290 qend = self.qend
1291
1292 source.compute_torsion()
1293 chain = self.chain_at(source, qstart, qend)
1294
1295 for i, r in enumerate(chain.residues):
1296
1297 acc = self.source_id[:4]
1298 ch = self.source_id[4].upper()
1299
1300 start = qstart - self.qstart + self.start + i
1301 aa = r.type
1302 ss = 'L'
1303 phi, psi, omega = 0, 0, 0
1304 if r.torsion.phi:
1305 phi = r.torsion.phi
1306 if r.torsion.psi:
1307 psi = r.torsion.psi
1308 if r.torsion.omega:
1309 omega = r.torsion.omega
1310
1311 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1312
1313 return stream.getvalue()
1314
1316
1317 BIT_SCORE_THRESHOLD = 1.1
1318
1320
1321 self._window = window
1322
1323 super(ChemShiftAssignment, self).__init__(
1324 source, start, end, qstart, qend, id=None, probability=1.0,
1325 rmsd=rmsd, tm_score=None, score=score, neff=None, segment=None, internal_id=None)
1326
1327 @property
1330
1333
1336
1339
1341 """
1342 Provides clustering/filtering of the fragments, covering a common residue
1343 in the target. Clustering is done via iterative shrinking of the cluster.
1344 At each iteration, node rejection (deletion) is attempted for each node. The
1345 node rejection, causing the most significant drop in the average pairwise
1346 distance (RMSD) in the cluster, is retained. This procedure is repeated
1347 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1348 2) the cluster gets exhausted or 3) node rejection no longer
1349 causes a drop in the average distance (not converging).
1350
1351 @param items: cluster members
1352 @type items: iterable of L{ClusterNode}s
1353 @param threshold: RMSD threshold; continue shrinking until the mean distance
1354 drops below this value (default=1.5)
1355 @type threshold: float
1356 @param connectedness: when calculating centroids, consider only nodes
1357 connected to at least c% of all surviving vertices
1358 (default=0.5)
1359 @type connectedness: float
1360 """
1361
1362 MIN_LENGTH = 6
1363
1365
1366 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH)
1367
1368 self._matrix = {}
1369 self._threshold = float(threshold)
1370 self._connectedness = float(connectedness)
1371 self._weight = 0
1372 self._edges = 0
1373
1374 visited = set()
1375
1376 for i in items:
1377 self._matrix.setdefault(i, {})
1378
1379 for j in items:
1380 self._matrix.setdefault(j, {})
1381
1382 if (j, i) not in visited:
1383 visited.add((i, j))
1384 distance = i.distance(j)
1385
1386 if distance is not None:
1387 self._matrix[i][j] = distance
1388 self._matrix[j][i] = distance
1389 i.weight += distance
1390 j.weight += distance
1391
1392 self._weight += distance
1393 self._edges += 1
1394
1395 self._items = set(self._matrix.keys())
1396
1397 if len(self._items) < 1:
1398 raise ClusterEmptyError()
1399
1400 self._initcount = self.count
1401
1402 @property
1405
1406 @property
1409
1410 @property
1412 return tuple(i.fragment for i in self._items)
1413
1414 @property
1417 @threshold.setter
1419 self._threshold = float(value)
1420
1421 @property
1424
1426
1427 d = []
1428
1429 for i in self._matrix:
1430 if skip is i:
1431 continue
1432
1433 for j in self._matrix[i]:
1434 if skip is not j:
1435 d.append(self._matrix[i][j])
1436
1437 return d
1438
1445
1447 """
1448 @return: the current mean distance in the cluster
1449 @rtype: float
1450 """
1451 if self._edges == 0:
1452 raise ClusterExhaustedError()
1453
1454 if not skip:
1455 return float(self._weight) / self._edges
1456
1457 else:
1458 weight = self._weight - skip.weight
1459 edges = self._edges - len(self._matrix[skip])
1460
1461 if edges < 1:
1462 return 0
1463 else:
1464 return float(weight) / edges
1465
1467 """
1468 @return: the current representative fragment
1469 @rtype: L{ClusterRep}
1470
1471 @note: the cluster rep is the node with the lowest average distance
1472 to all other nodes. If a fixed fragment exists, structurally similar
1473 to the rep, but longer, this fragment may be suggested as an alternative
1474 (see also L{ClusterRep}).
1475 """
1476
1477 alt = None
1478 cen = None
1479 avg = None
1480
1481 for i in self._matrix:
1482 edges = len(self._matrix[i]) or (1.0 / self.count)
1483 curravg = float(i.weight) / edges
1484 conn = len(self._matrix[i]) / float(self.count)
1485
1486 if avg is None or (curravg < avg and conn >= self.connectedness):
1487 avg = curravg
1488 cen = i
1489 elif curravg == avg:
1490 if i.fragment.length > cen.fragment.length:
1491 cen = i
1492
1493 d = self._distances()
1494 mean = numpy.mean(d)
1495 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1496
1497 for i in self._matrix:
1498 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1499 distance = self._distance(i, cen)
1500 if distance is not None and distance < 0.5 * self.threshold:
1501 if alt is None or alt.fragment.length < i.fragment.length:
1502 alt = i
1503
1504 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1505 rejections=(self._initcount - self.count))
1506
1508 """
1509 Remove C{item} from the cluster.
1510
1511 @type item: L{ClusterNode}
1512 @raise ClusterExhaustedError: if this is the last remaining item
1513 """
1514 if self.count == 1:
1515 raise ClusterExhaustedError()
1516
1517 assert not item.fixed
1518
1519 for i in self._matrix:
1520 if item in self._matrix[i]:
1521 distance = self._matrix[i][item]
1522 self._weight -= distance
1523 i.weight -= distance
1524
1525 del self._matrix[i][item]
1526 self._edges -= 1
1527
1528 del self._matrix[item]
1529 self._items.remove(item)
1530
1532 """
1533 Shrink the cluster by a single node.
1534
1535 @return: True on successful shrink, False otherwise (e.g. if
1536 already converged)
1537 @rtype: bool
1538 @raise ClusterExhaustedError: if exhausted
1539 @raise ClusterDivergingError: if not converging
1540 """
1541
1542 mean = self.mean()
1543 if mean <= self.threshold or self.count == 1:
1544 return False # already shrunk enough
1545
1546 m = {}
1547
1548 for i in self._matrix:
1549 if not i.fixed:
1550 newmean = self.mean(skip=i)
1551 m[newmean] = i
1552
1553 if len(m) == 0: # only fixed items remaining
1554 raise ClusterExhaustedError()
1555
1556 newmean = min(m)
1557
1558 if newmean > mean:
1559 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average
1560 elif newmean < mean:
1561 junk = m[newmean]
1562 self.reject(junk)
1563 return True # successful shrink
1564 else:
1565 return False # converged
1566
1568 """
1569 Start automatic shrinking.
1570
1571 @param minitems: absolute minimum of the number of nodes in the cluster
1572 @type minitems: int
1573
1574 @return: cluster's representative: the node with the lowest average
1575 distance to all other nodes in the cluster
1576 @rtype: L{ClusterRep}
1577
1578 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1579 still not converged
1580 """
1581
1582 if self.count > minitems:
1583
1584 while self.shrinkone():
1585 if self.count <= minitems:
1586 raise ClusterExhaustedError()
1587 else:
1588 raise ClusterExhaustedError()
1589
1590 return self.centroid()
1591
1593 """
1594 Cluster node.
1595
1596 @param fragment: fragment
1597 @type fragment: L{Assignment}
1598 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1599 @type distance: str
1600 @param fixed: mark this node as fixed (cannot be rejected)
1601 @type fixed: bool
1602 """
1603
1604 FIXED = 0.7
1605
1606 @staticmethod
1608 """
1609 Create a new L{ClusterNode} given a specified C{Assignment}. If this
1610 assignment is a high probability match, define it as a fixed fragment.
1611
1612 @rtype: L{ClusterNode}
1613 """
1614 if fragment.probability > ClusterNode.FIXED and fragment.length >= FragmentCluster.MIN_LENGTH:
1615 return ClusterNode(fragment, distance=method, fixed=extend)
1616 else:
1617 return ClusterNode(fragment, distance=method, fixed=False)
1618
1620
1621 if fixed and fragment.length < FragmentCluster.MIN_LENGTH:
1622 raise ValueError("Can't fix a short fragment")
1623
1624 self.fragment = fragment
1625 self.fixed = bool(fixed)
1626 self.weight = 0
1627
1628 self._distance = getattr(self.fragment, distance)
1629
1631 """
1632 @return: the distance between self and another node
1633 @type other: L{ClusterNode}
1634 @rtype: float
1635 """
1636 return self._distance(other.fragment)
1637
1639 """
1640 Cluster's representative (centroid) node. This object carries the
1641 result of shrinking itself.
1642
1643 @param centroid: rep node
1644 @type centroid: L{ClusterNode}
1645 @param mean: current mean distance in the cluster
1646 @type mean: float
1647 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1648 @type consistency: float
1649 @param count: current number of nodes in the cluster
1650 @type count: int
1651 @param rejections: total number of rejections
1652 @type rejections: int
1653 @param alternative: suggested cluster rep alternative (e.g. structurally
1654 similar to the centroid, but longer)
1655 @type param:
1656 """
1657
1659
1660 if isinstance(centroid, ClusterNode):
1661 centroid = centroid.fragment
1662 if isinstance(alternative, ClusterNode):
1663 alternative = alternative.fragment
1664
1665 self._centroid = centroid
1666 self._alternative = alternative
1667 self._mean = mean
1668 self._consistency = consistency
1669 self._count = count
1670 self._rejections = rejections
1671
1672 @property
1674 """
1675 Confidence of assignment: log10(count) * consistency
1676 """
1677 if self.count <= 0 or self.count is None or self.consistency is None:
1678 return 0
1679 else:
1680 return numpy.log10(self.count) * self.consistency
1681
1682 @property
1685
1686 @property
1689
1690 @property
1693
1694 @property
1697
1698 @property
1701
1702 @property
1705
1706 @property
1709
1711 """
1712 If an alternative is available, swap the centroid and the alternative.
1713 """
1714
1715 if self._alternative is not None:
1716
1717 centroid = self._centroid
1718 self._centroid = self._alternative
1719 self._alternative = centroid
1720
1722 """
1723 @deprecated: this method is obsolete and will be deleted soon
1724 """
1725 return self.centroid.to_rosetta(source, weight=self.confidence)
1726
1728
1729 @staticmethod
1731
1732 if center.centroid.qstart <= (start - overhang):
1733 start -= overhang
1734 elif center.centroid.qstart < start:
1735 start = center.centroid.qstart
1736
1737 if center.centroid.qend >= (end + overhang):
1738 end += overhang
1739 elif center.centroid.qend > end:
1740 end = center.centroid.end
1741
1742 return AdaptedAssignment(center, start, end)
1743
1745
1746 if qstart < center.centroid.qstart:
1747 raise ValueError(qstart)
1748 if qend > center.centroid.qend:
1749 raise ValueError(qend)
1750
1751 self._qstart = qstart
1752 self._qend = qend
1753 self._center = center
1754
1755 @property
1757 return self._center.centroid
1758
1759 @property
1762
1763 @property
1765 return self._center.confidence
1766
1767 @property
1770
1771 @property
1774
1775 @property
1778
1781
1784
1786
1788
1789 if not length > 0:
1790 raise ValueError(length)
1791
1792 self._length = int(length)
1793 self._slots = set(range(1, self._length + 1))
1794 self._map = {}
1795
1796 centers = list(centroids)
1797 centers.sort(key=lambda i: i.confidence, reverse=True)
1798
1799 for c in centers:
1800 self.assign(c)
1801
1802 @property
1805
1807
1808 for r in range(center.centroid.qstart, center.centroid.qend + 1):
1809 if r in self._slots:
1810 self._map[r] = center
1811 self._slots.remove(r)
1812
1814
1815 center = None
1816 start = None
1817 end = None
1818
1819 for r in range(1, self._length + 1):
1820
1821 if center is None:
1822 if r in self._map:
1823 center = self._map[r]
1824 start = end = r
1825 else:
1826 center = None
1827 start = end = None
1828 else:
1829 if r in self._map:
1830 if self._map[r] is center:
1831 end = r
1832 else:
1833 yield AdaptedAssignment(center, start, end)
1834 center = self._map[r]
1835 start = end = r
1836 else:
1837 yield AdaptedAssignment(center, start, end)
1838 center = None
1839 start = end = None
1840
1843
1845
1846 self.residue = residue
1847 self.confidence = confidence
1848 self.confident = confident
1849 self.gap = gap
1850 self.count = count
1851 self.rep = rep
1852
1853 @property
1856
1857 @property
1860
1861 @property
1863 if self.rep:
1864 return self.rep.torsion_at(self.rank, self.rank)[0]
1865 else:
1866 return None
1867
1870 """
1871 Simplifies the construction of fragment libraries.
1872 """
1873
1877
1879 """
1880 Build a fragment library given a L{Target} and its L{Assignment}s.
1881
1882 @param target: target protein
1883 @type target: L{Target}
1884
1885 @rtype: L{RosettaFragmentMap}
1886 """
1887
1888 frag_factory = self.rosetta.RosettaFragment
1889 fragments = list(map(frag_factory.from_object, target.matches))
1890 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ]
1891 fragments.sort()
1892
1893 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1894
1896 """
1897 Build a fixed-length fragment library from a list of
1898 variable-length L{Assignment}s.
1899
1900 @param fragments: source fragments
1901 @type fragments: iterable of L{RosettaFragment}s
1902 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1903 @type window: int
1904
1905 @return: fixed-length fragment library
1906 @rtype: L{RosettaFragmentMap}
1907 """
1908
1909 frags = []
1910
1911 for f in fragments:
1912 for qs in range(f.qstart, f.qend - window + 1):
1913 frags.append(f.subregion(qs, qs + window - 1))
1914
1915 return self.rosetta.RosettaFragmentMap(frags)
1916
1918 """
1919 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1920 The regions to be complemented are determined by calculating the confidence
1921 at each residue (by filtering).
1922
1923
1924 @param target: target protein
1925 @type target: L{Target}
1926 @param filling: additional fragments to place in the low-conf regions
1927 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1928 @param threshold: confidence threshold
1929 @type threshold: float
1930
1931 @return: complemented fragment library
1932 @rtype: L{RosettaFragmentMap}
1933 """
1934
1935 fragmap = self.make_fragset(target)
1936 covered = set()
1937
1938 for r in target.residues:
1939
1940 if r.assignments.length == 0:
1941 if callback:
1942 callback(ResidueEventInfo(r.native, gap=True))
1943 continue
1944
1945 cluster = r.filter()
1946 if cluster is None:
1947 if callback:
1948 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1949 continue
1950
1951 if cluster.confidence >= threshold:
1952 covered.add(r.native.rank)
1953 confident = True
1954 else:
1955 confident = False
1956
1957 if callback:
1958 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1959
1960 for r in target.residues:
1961 if r.native.rank not in covered: # true for gaps and low-conf residues
1962 fragmap.mark_unconfident(r.native.rank)
1963
1964 for frag in filling:
1965 fragmap.complement(frag)
1966
1967 return fragmap
1968
1970 """
1971 Builed a filtered fragment library (by clustering), containing only
1972 representative fragments (cluster centroids).
1973
1974 @param target: target protein
1975 @type target: L{Target}
1976 @param extend: if True, pick alternative reps if available
1977 @type extend: bool
1978
1979 @return: filtered fragment library
1980 @rtype: L{RosettaFragmentMap}
1981 """
1982
1983 fragments = []
1984
1985 for r in target.residues:
1986 if r.assignments.length == 0:
1987 if callback:
1988 callback(ResidueEventInfo(r.native, gap=True))
1989 continue
1990
1991 cluster = r.filter(extend=extend)
1992 if cluster is None:
1993 if callback:
1994 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1995 continue
1996
1997 if extend and cluster.has_alternative:
1998 best = cluster.alternative
1999 else:
2000 best = cluster.centroid
2001
2002 fragment = self.rosetta.RosettaFragment.from_object(best)
2003 fragments.append(fragment)
2004 if callback:
2005 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
2006
2007 fragments.sort()
2008 return self.rosetta.RosettaFragmentMap(fragments, target.length)
2009
2011 """
2012 Mix fragments from multiple libraries.
2013
2014 @type fragsets: L{RosettaFragmentMap}
2015 @return: mixed fragment library
2016 @rtype: L{RosettaFragmentMap}
2017 """
2018
2019 fragments = []
2020 length = 0
2021
2022 for fragset in fragsets:
2023 if fragset._length > length:
2024 length = fragset._length
2025
2026 for fragment in fragset:
2027 fragments.append(fragment)
2028
2029 return self.rosetta.RosettaFragmentMap(fragments, length)
2030
2033
2035
2036 FACTORY = None
2037 DSN = None
2038
2040
2041 self.factory = factory or self.__class__.FACTORY
2042 self.cs = dsn or self.__class__.DSN
2043 self.connection = None
2044 self.cursor = None
2045
2047
2048 self.connection = self.factory(self.cs)
2049 try:
2050 self.cursor = self.connection.cursor()
2051 except:
2052 self.connection.close()
2053 raise
2054 return self
2055
2063
2065
2066 self._pdb = pdb_paths
2067 self._connection = None
2068
2069 from csb.bio.io.wwpdb import find, StructureParser
2070 self._parser = StructureParser
2071 self._find = find
2072 self._factory = factory
2073
2074 try:
2075 import psycopg2.extras
2076 except ImportError:
2077 raise RuntimeError('Please install the psycopg2 module first')
2078
2079 if connection_string is None:
2080 connection_string = self.connection_string()
2081
2082 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection
2083 BenchmarkAdapter.Connection.DSN = connection_string
2084
2085 @staticmethod
2087
2088 fields = ['dbname={0}'.format(database)]
2089
2090 if host:
2091 fields.append('host={0}'.format(host))
2092 if username:
2093 fields.append('user={0}'.format(username))
2094 fields.append('password={0}'.format(password))
2095
2096 return ' '.join(fields)
2097
2099
2100 with BenchmarkAdapter.Connection() as db:
2101
2102 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,))
2103 return db.cursor.fetchall()
2104
2106
2107 with BenchmarkAdapter.Connection() as db:
2108
2109 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,))
2110 return db.cursor.fetchall()
2111
2113
2114 with BenchmarkAdapter.Connection() as db:
2115
2116 db.cursor.callproc('reporting."GetAssignments"', (target_id, type))
2117 return db.cursor.fetchall()
2118
2120
2121 with BenchmarkAdapter.Connection() as db:
2122
2123 db.cursor.callproc('reporting."GetTargetSecStructureAssignments2"', (target_id, type))
2124 return db.cursor.fetchall()
2125
2127
2128 with BenchmarkAdapter.Connection() as db:
2129
2130 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type))
2131 return db.cursor.fetchall()
2132
2134
2135 with BenchmarkAdapter.Connection() as db:
2136
2137 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,))
2138 return db.cursor.fetchall()
2139
2141
2142 with BenchmarkAdapter.Connection() as db:
2143
2144 db.cursor.callproc('reporting."GetTargetSegments"', (target_id,))
2145 data = db.cursor.fetchall()
2146
2147 return [ TargetSegment(row['Start'], row['End'], row['Count']) for row in data ]
2148
2150
2151 pdbfile = self._find(accession, self._pdb)
2152
2153 if not pdbfile and chain:
2154 pdbfile = self._find(accession + chain, self._pdb)
2155
2156 if not pdbfile:
2157 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2158
2159 return self._parser(pdbfile).parse_structure()
2160
2162
2163 info = self.target_details(target_id)
2164 if not info:
2165 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2166 row = info[0]
2167
2168 id = row["Accession"]
2169 length = float(row["Length"])
2170 overlap = float(row["MaxOverlap"]) / (length or 1.)
2171
2172 native = self.structure(id[:4], id[4]).chains[id[4]]
2173 segments = self.target_segments(target_id)
2174 target = self._factory.target(id, length, native.residues, overlap, segments)
2175
2176 source = None
2177
2178 for row in self.assignments(target_id, type):
2179
2180 src_accession = row['Source'][:4]
2181 src_chain = row['Source'][4]
2182
2183 if source is None or source.accession != src_accession:
2184 try:
2185 source = self.structure(src_accession, src_chain)
2186 except (IOError, ValueError) as ex:
2187 target.errors.append(ex)
2188 continue
2189
2190 if src_chain == '_':
2191 frag_chain = source.first_chain
2192 else:
2193 frag_chain = source.chains[src_chain]
2194 if not frag_chain.has_torsion:
2195 frag_chain.compute_torsion()
2196
2197 fragment = self._factory.assignment(
2198 source=frag_chain,
2199 start=row['SourceStart'],
2200 end=row['SourceEnd'],
2201 id=row['FragmentName'],
2202 qstart=row['Start'],
2203 qend=row['End'],
2204 probability=row['Probability'],
2205 score=row['Score'],
2206 neff=row['Neff'],
2207 rmsd=row['RMSD'],
2208 tm_score=row['TMScore'],
2209 segment=row['SegmentStart'],
2210 internal_id=row['InternalID'])
2211
2212 target.assign(fragment)
2213
2214 if ss:
2215 self._attach_sec_structure(target, target_id, type)
2216
2217 return target
2218
2220
2221 ss = {}
2222
2223 for row in self.assignments_sec_structure(target_id, type):
2224 frag_id, state = row["AssignmentID"], row["DSSP"]
2225 if row[frag_id] not in ss:
2226 ss[frag_id] = []
2227
2228 ss[frag_id].append(state)
2229
2230 for a in target.matches:
2231 if a.internal_id in ss:
2232 dssp = ''.join(ss[a.internal_id])
2233 a.secondary_structure = dssp
2234
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Oct 14 13:21:36 2013 | http://epydoc.sourceforge.net |