advanced python concepts: oop & inheritance
DESCRIPTION
Advanced Python Concepts: OOP & Inheritance. BCHB524 2013 Lecture 18. Last time. Object oriented programing (OOP) Enables us to describe, and program with, concepts A class describes the behavior of the object Data members (information storage) - PowerPoint PPT PresentationTRANSCRIPT
11/4/2013 BCHB524 - 2013 - Edwards
Advanced Python Concepts:
OOP & Inheritance
BCHB5242013
Lecture 18
11/4/2013 BCHB524 - 2013 - Edwards
Last time...
Object oriented programing (OOP) Enables us to describe, and program with,
concepts A class describes the behavior of the object
Data members (information storage) Methods (actions which manipulate the object)
Each instance of the class behaves as defined by the class. Typically each instance has different values in the
class’ internal data-members.2
11/4/2013 BCHB524 - 2013 - Edwards
Complete DNASeq.py Module
class DNASeq: def __init__(self,seq="",name=""): self.seq = seq self.name = name def read(self,filename): self.seq = ''.join(open(filename).read().split()) def reverse(self): return self.seq[::-1] def complement(self): d = {'A':'T','C':'G','G':'C','T':'A'} return ''.join(map(d.get,self.seq)) def reverseComplement(self): return ''.join(reversed(self.complement())) def length(self): return len(self.seq) def freq(self,nuc): return self.seq.count(nuc) def percentGC(self): gccount = self.freq('C') + self.freq('G') return 100*float(gccount)/self.length()
3
Describe class in a module, then access using an import statement
11/4/2013 BCHB524 - 2013 - Edwards
Complete DNASeq.py Module
from DNAseq import DNAseq
ds = DNASeq('ACGTACGTACGTACGT','My sequence')print ds.complement(),ds.length(),ds.reverseComplement()print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC()
ds = DNASeq()ds.read('anthrax_sasp.nuc')print ds.complement(),ds.length(),ds.reverseComplement()print ds.freq('C'),ds.freq('G'),ds.length(),ds.percentGC()
4
11/4/2013 BCHB524 - 2013 - Edwards
Class Inheritance
Inheritance allows similar classes or concepts to share common data and methods
Classic example: DNA Sequence, Transcript, Protein All contain a name and a sequence data-member All require length, is_valid methods Otherwise, specific differences in their details
5
11/4/2013 BCHB524 - 2013 - Edwards
Diagram Seq
seqname
length()freq()is_valid()
Protein
mw
valid_symbol()molWt()
DNA
comp
valid_symbol()reverseComplement()
Base-class“Parent”
Derived classes“Children”
6
11/4/2013 BCHB524 - 2013 - Edwards
Sequence objects: Sequence.py
class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return True
class DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} def valid_symbol(self,sym): if sym in 'ACGT': return True return False def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) 7
11/4/2013 BCHB524 - 2013 - Edwards
Sequence objects: Sequence.py
class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if not self.valid_symbol(sym): return False return Trueclass Protein(Seq): mw = {'A': 71.04, 'C': 103.01, 'D': 115.03, 'E': 129.04, 'F': 147.07, 'G': 57.02, 'H': 137.06, 'I': 113.08, 'K': 128.09, 'L': 113.08, 'M': 131.04, 'N': 114.04, 'P': 97.05, 'Q': 128.06, 'R': 156.10, 'S': 87.03, 'T': 101.05, 'V': 99.07, 'W': 186.08, 'Y': 163.06 } def valid_symbol(self,sym): if sym in 'ACDEFGHIKLMNPQRSTVWY': return True return False def molWt(self): return sum(map(self.mw.get,self.seq))
8
11/4/2013 BCHB524 - 2013 - Edwards
Sequence objects
Using Sequence.py
from Sequence import *
s1 = DNA('ACGTACGTACGTACGT','DNA1')if s1.is_valid(): print s1.reverseComplement(), s1.length(), s1.freq('A')s2 = Protein('ACDEFGHIKL','Prot1')if s2.is_valid(): print s2.molWt(), s2.length(), s2.freq('H')
9
11/4/2013 BCHB524 - 2013 - Edwards
Diagram Seq
seqname
length()is_valid()
Protein
mw
valid_symbol()molWt()
DNA
comp
valid_symbol()reverseComplement()
Abstract base-class“Parent”
Derived classes“Children”
10
11/4/2013 BCHB524 - 2013 - Edwards
Base-class method using derived-class data member
class Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return Trueclass DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1]))class Protein(Seq): mw = {'A': 71.04, 'C': 103.01, 'D': 115.03, 'E': 129.04, 'F': 147.07, 'G': 57.02, 'H': 137.06, 'I': 113.08, 'K': 128.09, 'L': 113.08, 'M': 131.04, 'N': 114.04, 'P': 97.05, 'Q': 128.06, 'R': 156.10, 'S': 87.03, 'T': 101.05, 'V': 99.07, 'W': 186.08, 'Y': 163.06 } valid_sym = 'ACDEFGHIKLMNPQRSTVWY' def molWt(self): return sum(map(self.mw.get,self.seq))
11
11/4/2013 BCHB524 - 2013 - Edwards
Revisit the CodonTable module
class CodonTable: data = None
def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data)
def parse(self,data): lines = {} for l in data.split('\n'): sl = l.split() try: key = sl[0] value = sl[2] lines[key] = value except IndexError: pass
b1 = lines['Base1'] b2 = lines['Base2'] b3 = lines['Base3'] aa = lines['AAs'] st = lines['Starts']
self.table = {} n = len(aa) for i in range(n): codon = b1[i] + b2[i] + b3[i] isInit = (st[i] == 'M') self.table[codon] = (aa[i],isInit) return
def aa(self,codon): try: return self.table[codon][0] except KeyError: return 'X'
def translate(self,seq,frame): aaseq = [] for codon in seq.codons(frame): aaseq.append(self.aa(codon)) return ''.join(aaseq)
12
11/4/2013 BCHB524 - 2013 - Edwards
Revisit the CodonTable module
class CodonTable: data = None def __init__(self,filename=None): if filename: data = open(filename).read() self.parse(data) else: self.parse(self.data) # ...
class StandardCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGGStarts = ---M---------------M---------------M----------------------------Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGGBase2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGBase3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"""
class BacterialCode(CodonTable): data = """ AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGGStarts = ---M---------------M------------MMMM---------------M------------Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGGBase2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGBase3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG""" 13
11/4/2013 BCHB524 - 2013 - Edwards
Add codons to the DNA classclass Seq: def __init__(self,seq,name): self.seq = seq self.name = name def length(self): return len(self.seq) def freq(self,sym): return self.seq.count(sym) def is_valid(self): for sym in self.seq: if sym not in self.valid_sym: return False return Trueclass DNA(Seq): comp = {'A':'T','C':'G','G':'C','T':'A'} valid_sym = 'ACGT' def reverseComplement(self): return ''.join(map(self.comp.get,self.seq[::-1])) def codons(self,frame): result = [] for i in range(frame-1,len(self.seq),3): result.append(self.seq[i:i+3]) return result
14
from Sequence import *from CodonTable import *
s1 = DNA('ACGTACGTACGTACGT','DNA1')ct = StandardCode()
print ct.translate(s1,2)
11/4/2013 BCHB524 - 2013 - Edwards
Using the CodonTable module
15