Advanced Python Concepts: OOP & Inheritance BCHB524 Lecture 18 BCHB524 - Edwards
Last time... Object oriented programing (OOP) Enables us to describe, and program with, concepts A class describes the behavior of the object Data members (information storage) Methods (actions which manipulate the object) Each instance of the class behaves as defined by the class. Typically each instance has different values in the class’ internal data-members. BCHB524 - Edwards
Complete DNASeq.py Module class DNASeq: def __init__(self,seq="",name=""): self.seq = seq self.name = name def read(self,filename): self.seq = ''.join(open(filename).read().split()) def reverse(self): return self.seq[::-1] def complement(self): d = {'A':'T','C':'G','G':'C','T':'A'} return ''.join(map(d.get,self.seq)) def reverseComplement(self): return ''.join(reversed(self.complement())) def length(self): return len(self.seq) def freq(self,nuc): return self.seq.count(nuc) def percentGC(self): gccount = self.freq('C') + self.freq('G') return 100*float(gccount)/self.length() BCHB524 - Edwards
Complete DNASeq.py Module Describe class in a module, then access using an import statement from DNASeq import DNASeq ds = DNASeq('ACGTACGTACGTACGT','My sequence') print ds.complement(),ds.length(),ds.reverseComplement() print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC() ds = DNASeq() ds.read('anthrax_sasp.nuc') print ds.complement(),ds.length(),ds.reverseComplement() print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC() BCHB524 - Edwards
Class Inheritance Inheritance allows similar classes or concepts to share common data and methods Classic example: DNA Sequence, Transcript, Protein All contain a name and a sequence data-member All require length, is_valid methods Otherwise, specific differences in their details BCHB524 - Edwards
Diagram Seq DNA Protein seq name length() freq() is_valid() Base-class “Parent” DNA comp valid_symbol() reverseComplement() Protein mw valid_symbol() molWt() Derived classes “Children” BCHB524 - Edwards
Sequence objects: Sequence.py class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} def valid_symbol(self,sym): if sym in 'ACGT': return True return False def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) BCHB524 - Edwards
Sequence objects: Sequence.py class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return True class Protein(Seq): mw = {'A': 71.04, 'C': 103.01, 'D': 115.03, 'E': 129.04, 'F': 147.07, 'G': 57.02, 'H': 137.06, 'I': 113.08, 'K': 128.09, 'L': 113.08, 'M': 131.04, 'N': 114.04, 'P': 97.05, 'Q': 128.06, 'R': 156.10, 'S': 87.03, 'T': 101.05, 'V': 99.07, 'W': 186.08, 'Y': 163.06 } def valid_symbol(self,sym): if sym in 'ACDEFGHIKLMNPQRSTVWY': return True return False def molWt(self): return sum(map(self.mw.get,self.seq)) BCHB524 - Edwards
Sequence objects Using Sequence.py from Sequence import * s1 = DNA('ACGTACGTACGTACGT','DNA1') if s1.is_valid(): print s1.reverseComplement(), s1.length(), s1.freq('A') s2 = Protein('ACDEFGHIKL','Prot1') if s2.is_valid(): print s2.molWt(), s2.length(), s2.freq('H') BCHB524 - Edwards
Diagram Seq DNA Protein seq name length() is_valid() Abstract base-class “Parent” DNA comp valid_symbol() reverseComplement() Protein mw valid_symbol() molWt() Derived classes “Children” BCHB524 - Edwards
Base-class method using derived-class data member class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) class Protein(Seq): mw = {'A': 71.04, 'C': 103.01, 'D': 115.03, 'E': 129.04, 'F': 147.07, 'G': 57.02, 'H': 137.06, 'I': 113.08, 'K': 128.09, 'L': 113.08, 'M': 131.04, 'N': 114.04, 'P': 97.05, 'Q': 128.06, 'R': 156.10, 'S': 87.03, 'T': 101.05, 'V': 99.07, 'W': 186.08, 'Y': 163.06 } valid_sym = 'ACDEFGHIKLMNPQRSTVWY' def molWt(self): return sum(map(self.mw.get,self.seq)) BCHB524 - Edwards
Revisit the CodonTable module class CodonTable: data = None def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data) def parse(self,data): lines = {} for l in data.split('\n'): sl = l.split() try: key = sl[0] value = sl[2] lines[key] = value except IndexError: pass b1 = lines['Base1'] b2 = lines['Base2'] b3 = lines['Base3'] aa = lines['AAs'] st = lines['Starts'] self.table = {} n = len(aa) for i in range(n): codon = b1[i] + b2[i] + b3[i] isInit = (st[i] == 'M') self.table[codon] = (aa[i],isInit) return def aa(self,codon): try: return self.table[codon][0] except KeyError: return 'X' def translate(self,seq,frame): aaseq = [] for codon in seq.codons(frame): aaseq.append(self.aa(codon)) return ''.join(aaseq) BCHB524 - Edwards
Revisit the CodonTable module class CodonTable: data = None def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data) # ... class StandardCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M---------------M---------------M---------------------------- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG """ class BacterialCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M---------------M------------MMMM---------------M------------ Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG """ BCHB524 - Edwards
Add codons to the DNA class class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) def codons(self,frame): result = [] for i in range(frame-1,len(self.seq),3): result.append(self.seq[i:i+3]) return result BCHB524 - Edwards
Using the CodonTable module from Sequence import * from CodonTable import * s1 = DNA('ACGTACGTACGTACGT','DNA1') ct = StandardCode() print ct.translate(s1,2) BCHB524 - Edwards