Source code for perseuspy.dependent_peptides

"""
Dependent peptides can be extracted from the `allPeptides.txt` table
and are annotated using the `experimentalDesign.txt`.

This code forms the basis for the corresponding Perseus plugin PluginDependentPeptides.
"""
import pandas as pd
from perseuspy.io.perseus.matrix import read_perseus
pd.read_perseus = read_perseus
from perseuspy.io.maxquant import read_rawFilesTable
from perseuspy.parameters import fileParam, parse_parameters
import numpy as np

_index_columns = ['DP Proteins', 'DP Base Sequence', 'DP Cluster Index', 'DP Modification']
_cols = ['DP Ratio mod/base', 'Raw file', 'DP AA'] + _index_columns
[docs]def read_dependent_peptides(filename): """ read the dependent peptides table and extract localiztion information :param filename: path to the 'allPeptides.txt' table. :returns dep, localization: the dependent peptide table, localization information. """ df = (pd.read_perseus(filename, usecols=_cols) .dropna(subset=['DP Ratio mod/base'])) df['DP Ratio mod/base'] = df['DP Ratio mod/base'].astype(float) dep = df.pivot_table('DP Ratio mod/base', index=_index_columns, columns='Raw file', aggfunc=np.median) localization = _count_localizations(df) return dep, localization
def _set_column_names(dep, exp): """ rename the columns in the dependent peptides table from the raw file to the corresponding {experiment}_{fraction}. :param dep: dependent peptides table. :param exp: experimental design table. """ colnames = exp['Experiment'] + '_' + exp['Fraction'].astype(str) file2col = dict(zip(exp['Raw file'], colnames)) _dep = dep.rename(columns=file2col) _dep.columns.name = 'Column Name' return _dep from collections import defaultdict
[docs]def count(args): """ count occurences in a list of lists >>> count([['a','b'],['a']]) defaultdict(int, {'a' : 2, 'b' : 1}) """ counts = defaultdict(int) for arg in args: for item in arg: counts[item] = counts[item] + 1 return counts
def _count_localizations(df): """ count the most likely localization for each depentent peptide. :param df: allPeptides.txt table. """ grp = df.groupby(_index_columns) counts = grp['DP AA'].apply(lambda x: count(x.str.split(';').values)) counts.index = counts.index.set_names('DP AA', level=4) counts.name = 'DP AA count' best_localization = counts.reset_index().groupby(_index_columns).apply(_frequent_localizations) return best_localization def _frequent_localizations(df): """ returns the most frequent localization for any dependent peptide. In case of ties, preference is given to n-terminal modification which are biologically more likely to occur :param df: allPeptides.txt table. """ max_count = int(df['DP AA count'].max()) max_aa = set(df[df['DP AA count'] == max_count]['DP AA'].unique()) result = {'DP AA max count' : max_count} if 'nterm' in max_aa: result['DP AA'] = 'nterm' else: result['DP AA'] = ';'.join(sorted(max_aa)) return pd.Series(result)
[docs]def run_dependent_peptides_from_parameters(paramfile, outfile): """ transform a allPeptides.txt and experimentalDesign.txt table into the dependentPeptides.txt table written in outfile. :param paramfile: Perseus parameters.xml including at least two FileParam entries names 'allPeptides.txt' and 'experimentalDesign.txt'. :param outfile: Path to the output file. """ parameters = parse_parameters(paramfile) allPeptides_file = fileParam(parameters, 'allPeptides.txt') rawFilesTable_file = fileParam(parameters, 'Raw files table') run_dependent_peptides(allPeptides_file, rawFilesTable_file, outfile)
[docs]def run_dependent_peptides(allPeptides_file, rawFilesTable_file, outfile): """ transform a allPeptides.txt and experimentalDesign.txt table into the dependentPeptides.txt table written in outfile. :param allPeptides_file: MaxQuant 'allPeptides.txt' output table. :param rawFilesTable_file: MaxQuant 'Raw files'-tab table. :param outfile: Path to the output file. """ __dep, localization = read_dependent_peptides(allPeptides_file) exp = read_rawFilesTable(rawFilesTable_file) _dep = _set_column_names(__dep, exp) main_columns = list(_dep.columns) dep = _dep.join(localization).reset_index() dep.to_perseus(outfile, main_columns=main_columns)