11/9/2015BCHB Edwards Advanced Python Concepts: OOP & Inheritance BCHB Lecture 18
11/9/2015BCHB Edwards Last time... Object oriented programing (OOP) Enables us to describe, and program with, concepts A class describes the behavior of the object Data members (information storage) Methods (actions which manipulate the object) Each instance of the class behaves as defined by the class. Typically each instance has different values in the class’ internal data-members. 2
11/9/2015BCHB Edwards Complete DNASeq.py Module class DNASeq: def __init__(self,seq="",name=""): self.seq = seq self.name = name def read(self,filename): self.seq = ''.join(open(filename).read().split()) def reverse(self): return self.seq[::-1] def complement(self): d = {'A':'T','C':'G','G':'C','T':'A'} return ''.join(map(d.get,self.seq)) def reverseComplement(self): return ''.join(reversed(self.complement())) def length(self): return len(self.seq) def freq(self,nuc): return self.seq.count(nuc) def percentGC(self): gccount = self.freq('C') + self.freq('G') return 100*float(gccount)/self.length() 3
Describe class in a module, then access using an import statement 11/9/2015BCHB Edwards Complete DNASeq.py Module from DNAseq import DNAseq ds = DNASeq('ACGTACGTACGTACGT','My sequence') print ds.complement(),ds.length(),ds.reverseComplement() print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC() ds = DNASeq() ds.read('anthrax_sasp.nuc') print ds.complement(),ds.length(),ds.reverseComplement() print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC() 4
11/9/2015BCHB Edwards Class Inheritance Inheritance allows similar classes or concepts to share common data and methods Classic example: DNA Sequence, Transcript, Protein All contain a name and a sequence data-member All require length, is_valid methods Otherwise, specific differences in their details 5
11/9/2015BCHB Edwards Diagram Seq seq name length() freq() is_valid() Protein mw valid_symbol() molWt() DNA comp valid_symbol() reverseComplement() Base-class “Parent” Derived classes “Children” 6
11/9/2015BCHB Edwards Sequence objects: Sequence.py class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} def valid_symbol(self,sym): if sym in 'ACGT': return True return False def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) 7
11/9/2015BCHB Edwards Sequence objects: Sequence.py class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return True class Protein(Seq): mw = {'A': 71.04, 'C': , 'D': , 'E': , 'F': , 'G': 57.02, 'H': , 'I': , 'K': , 'L': , 'M': , 'N': , 'P': 97.05, 'Q': , 'R': , 'S': 87.03, 'T': , 'V': 99.07, 'W': , 'Y': } def valid_symbol(self,sym): if sym in 'ACDEFGHIKLMNPQRSTVWY': return True return False def molWt(self): return sum(map(self.mw.get,self.seq)) 8
11/9/2015BCHB Edwards Sequence objects Using Sequence.py from Sequence import * s1 = DNA('ACGTACGTACGTACGT','DNA1') if s1.is_valid(): print s1.reverseComplement(), s1.length(), s1.freq('A') s2 = Protein('ACDEFGHIKL','Prot1') if s2.is_valid(): print s2.molWt(), s2.length(), s2.freq('H') 9
11/9/2015BCHB Edwards Diagram Seq seq name length() is_valid() Protein mw valid_symbol() molWt() DNA comp valid_symbol() reverseComplement() Abstract base-class “Parent” Derived classes “Children” 10
11/9/2015BCHB Edwards Base-class method using derived-class data member class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) class Protein(Seq): mw = {'A': 71.04, 'C': , 'D': , 'E': , 'F': , 'G': 57.02, 'H': , 'I': , 'K': , 'L': , 'M': , 'N': , 'P': 97.05, 'Q': , 'R': , 'S': 87.03, 'T': , 'V': 99.07, 'W': , 'Y': } valid_sym = 'ACDEFGHIKLMNPQRSTVWY' def molWt(self): return sum(map(self.mw.get,self.seq)) 11
11/9/2015BCHB Edwards Revisit the CodonTable module class CodonTable: data = None def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data) def parse(self,data): lines = {} for l in data.split('\n'): sl = l.split() try: key = sl[0] value = sl[2] lines[key] = value except IndexError: pass b1 = lines['Base1'] b2 = lines['Base2'] b3 = lines['Base3'] aa = lines['AAs'] st = lines['Starts'] self.table = {} n = len(aa) for i in range(n): codon = b1[i] + b2[i] + b3[i] isInit = (st[i] == 'M') self.table[codon] = (aa[i],isInit) return def aa(self,codon): try: return self.table[codon][0] except KeyError: return 'X' def translate(self,seq,frame): aaseq = [] for codon in seq.codons(frame): aaseq.append(self.aa(codon)) return ''.join(aaseq) 12
11/9/2015BCHB Edwards Revisit the CodonTable module class CodonTable: data = None def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data) #... class StandardCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M M M Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG """ class BacterialCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M M MMMM M Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG """ 13
11/9/2015BCHB Edwards Add codons to the DNA class class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return True class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) def codons(self,frame): result = [] for i in range(frame-1,len(self.seq),3): result.append(self.seq[i:i+3]) return result 14
from Sequence import * from CodonTable import * s1 = DNA('ACGTACGTACGTACGT','DNA1') ct = StandardCode() print ct.translate(s1,2) 11/9/2015BCHB Edwards Using the CodonTable module 15