import string # Translation table from one-letter codes to three-letter codes. residue_name = {'a': 'ala', 'r': 'arg', 'n': 'asn', 'd': 'asp', 'c': 'cys', 'q': 'gln', 'e': 'glu', 'g': 'gly', 'h': 'his', 'i': 'ile', 'l': 'leu', 'k': 'lys', 'm': 'met', 'f': 'phe', 'p': 'pro', 's': 'ser', 't': 'thr', 'w': 'trp', 'y': 'tyr', 'v': 'val'} # Table of residue masses. residue_mass = {'ala': 71.0790184654, 'arg': 157.196106437, 'asn': 114.104059219, 'asp': 114.080688839, 'cys': 103.143406585, 'gln': 128.131048075, 'glu': 128.107677695, 'gly': 57.0520296093, 'his': 137.141527428, 'ile': 113.159985034, 'leu': 113.159985034, 'lys': 129.182660137, 'met': 131.197384297, 'phe': 147.17714379, 'pro': 97.1170442246, 'ser': 87.0783231891, 'thr': 101.105312045, 'trp': 186.213916723, 'tyr': 163.176448514, 'val': 99.1329961777} # The additional mass of a peptide chain due to the termini. terminus_mass = 18.0152566767 # The function that does the calculation def totalMass(peptide_chain): mass = terminus_mass for residue in peptide_chain: residue = string.lower(residue) if len(residue) == 1: residue = residue_name[residue] mass = mass + residue_mass[residue] return massNote the use of two dictionaries to store the information about residues.
Next, a simple application that uses this module:
from PeptideChain import totalMass print totalMass('AEG') print totalMass(['ala', 'arg', 'gly', 'his'])
The module MatrixIO.py:
from Scientific.IO.TextFile import TextFile import string import Numeric def readMatrix(filename): rows = [] for line in TextFile(filename): columns = [] for number in string.split(line): columns.append(string.atof(number)) rows.append(columns) return Numeric.array(rows) def writeMatrix(a, filename): file = TextFile(filename, 'w') for line in a: for number in line: file.write(`number` + ' ') file.write('\n') file.close()
In the first function, the function string.atof is applied to all elements of a list. This is such a frequent operation that a special shorthand has been introduced: the function map(function, sequence) applies a function to each element of a sequence and returns a list of the results. This makes it possible to replace the lines
columns = [] for number in string.split(line): columns.append(string.atof(number))by the single line columns = map(string.atof, string.split(line)).
First the generation of the test data:
import MatrixIO import Numeric for i in range(1, 51): time = Numeric.arrayrange(10.) values = 4.*Numeric.ones(10) parameters = (i, -3.5) data = Numeric.zeros((11, 2), Numeric.Float) data[0] = parameters data[1:, 0] = time data[1:, 1] = values MatrixIO.writeMatrix(data, "data"+`i`)Note how everything is put into one large array before output; this trick allows the use of the matrix output function written in the last exercise. The result of the analysis program must be a constant time series whose value is twice the sum of the integers from 1 to 50, i.e. 50*51 = 2550.
And now the program that "analyses" the data files.
import MatrixIO import Numeric import Gnuplot total = 0. for i in range(1, 51): data = MatrixIO.readMatrix("data"+`i`) time = data[1:, 0] values = data[1:, 1] parameters = data[0] total = total + parameters[0]*Numeric.sqrt(values) plot_data = Numeric.transpose(Numeric.array([time, total])) Gnuplot.plot(plot_data)