import string
# Translation table from one-letter codes to three-letter codes.
residue_name = {'a': 'ala', 'r': 'arg', 'n': 'asn', 'd': 'asp', 'c': 'cys',
'q': 'gln', 'e': 'glu', 'g': 'gly', 'h': 'his', 'i': 'ile',
'l': 'leu', 'k': 'lys', 'm': 'met', 'f': 'phe', 'p': 'pro',
's': 'ser', 't': 'thr', 'w': 'trp', 'y': 'tyr', 'v': 'val'}
# Table of residue masses.
residue_mass = {'ala': 71.0790184654,
'arg': 157.196106437,
'asn': 114.104059219,
'asp': 114.080688839,
'cys': 103.143406585,
'gln': 128.131048075,
'glu': 128.107677695,
'gly': 57.0520296093,
'his': 137.141527428,
'ile': 113.159985034,
'leu': 113.159985034,
'lys': 129.182660137,
'met': 131.197384297,
'phe': 147.17714379,
'pro': 97.1170442246,
'ser': 87.0783231891,
'thr': 101.105312045,
'trp': 186.213916723,
'tyr': 163.176448514,
'val': 99.1329961777}
# The additional mass of a peptide chain due to the termini.
terminus_mass = 18.0152566767
# The function that does the calculation
def totalMass(peptide_chain):
mass = terminus_mass
for residue in peptide_chain:
residue = string.lower(residue)
if len(residue) == 1:
residue = residue_name[residue]
mass = mass + residue_mass[residue]
return mass
Note the use of two dictionaries to store the information about residues.
Next, a simple application that uses this module:
from PeptideChain import totalMass
print totalMass('AEG')
print totalMass(['ala', 'arg', 'gly', 'his'])
The module MatrixIO.py:
from Scientific.IO.TextFile import TextFile
import string
import Numeric
def readMatrix(filename):
rows = []
for line in TextFile(filename):
columns = []
for number in string.split(line):
columns.append(string.atof(number))
rows.append(columns)
return Numeric.array(rows)
def writeMatrix(a, filename):
file = TextFile(filename, 'w')
for line in a:
for number in line:
file.write(`number` + ' ')
file.write('\n')
file.close()
In the first function, the function string.atof is applied to all elements of a list. This is such a frequent operation that a special shorthand has been introduced: the function map(function, sequence) applies a function to each element of a sequence and returns a list of the results. This makes it possible to replace the lines
columns = []
for number in string.split(line):
columns.append(string.atof(number))
by the single line columns = map(string.atof,
string.split(line)).
First the generation of the test data:
import MatrixIO
import Numeric
for i in range(1, 51):
time = Numeric.arrayrange(10.)
values = 4.*Numeric.ones(10)
parameters = (i, -3.5)
data = Numeric.zeros((11, 2), Numeric.Float)
data[0] = parameters
data[1:, 0] = time
data[1:, 1] = values
MatrixIO.writeMatrix(data, "data"+`i`)
Note how everything is put into one large array before output; this
trick allows the use of the matrix output function written in the last
exercise. The result of the analysis program must be a constant time
series whose value is twice the sum of the integers from 1 to 50, i.e.
50*51 = 2550.
And now the program that "analyses" the data files.
import MatrixIO
import Numeric
import Gnuplot
total = 0.
for i in range(1, 51):
data = MatrixIO.readMatrix("data"+`i`)
time = data[1:, 0]
values = data[1:, 1]
parameters = data[0]
total = total + parameters[0]*Numeric.sqrt(values)
plot_data = Numeric.transpose(Numeric.array([time, total]))
Gnuplot.plot(plot_data)