Select Git revision
Mdp_toolbox.cpp
Mdp_toolbox.cpp 3.81 KiB
#include "Mdp_toolbox.hpp"
Mdp_toolbox::Mdp_toolbox(){}
Mdp_toolbox::Mdp_toolbox(std::string database_location, bool clean_ends, std::string clean_marked_motifs)
{
database = Database(database_location, clean_ends, clean_marked_motifs);
for(auto motif_1: database.get_motif_database())
for(auto motif_2: database.get_motif_database())
{
//compare different pairs, and only if not already calculated
Motif_pair motif_pair(motif_1, motif_2);
if(motif_1 != motif_2 && mdp_database.find(motif_pair) == mdp_database.end())
mdp_database[motif_pair]= motif_discriminating_positions(motif_pair);
}
}
int Mdp_toolbox::intersection(std::string a, std::string b){
int counter=0;
if(a.size()>10 && b.size()>10) return 20;
else if (a.size()>10 || b.size()>10) return std::min(a.size(),b.size());
forn(i,a.size())
if(b.find(a[i])!=std::string::npos)
counter++;
return counter;
}
int Mdp_toolbox::simple_motif_discriminating_positions(Motif_class motif_1, Motif_class motif_2){
if (motif_1.get_sequence().size() != motif_2.get_sequence().size())
throw std::invalid_argument( "Motifs need to have the same size." );
int counter = 0;
forn(i,motif_1.get_sequence().size())
if(intersection(motif_1.get_sequence()[i],motif_2.get_sequence()[i])>0)
counter++;
return motif_1.get_sequence().size()-counter;
}
int Mdp_toolbox::alignval(Motif_class motif_1, Motif_class motif_2)
{
if (motif_1.get_sequence().size() != motif_2.get_sequence().size())
throw std::invalid_argument( "Motifs need to have the same size." );
int motif_size = motif_1.get_sequence().size();
int alignval = 0;
int pa,pb;
forn(i,motif_size)
{
if(motif_1.get_sequence()[i].size()<=10) pa=1; else pa=0;
if(motif_2.get_sequence()[i].size()<=10) pb=1; else pb=0;
if(alignval < pa*pb) alignval = pa*pb;
}
return alignval;
}
int Mdp_toolbox::motif_discriminating_positions(Motif_pair motif_pair){
Motif_class long_motif = motif_pair.get_long_motif();
Motif_class short_motif = motif_pair.get_short_motif();
int length_difference = long_motif.get_sequence().size()-short_motif.get_sequence().size()+1;
int motif_discriminating_positions = 99;
forn(i,length_difference)
{
std::vector<std::string> sequence;
forn(j, short_motif.get_sequence().size())
sequence.push_back(long_motif.get_sequence()[i + j]);
Motif_class submotif(long_motif.get_name(), sequence);
int align_val = alignval(short_motif, submotif);
int partial_motif_discriminating_positions = simple_motif_discriminating_positions(short_motif, submotif);
if(partial_motif_discriminating_positions < motif_discriminating_positions && align_val > 0)
motif_discriminating_positions = partial_motif_discriminating_positions;
}
return motif_discriminating_positions;
}
std::vector<Motif_pair> Mdp_toolbox::get_pairs_k_discriminant_positions(int k)
{
std::vector<Motif_pair> pairs_k_discriminant_position;
for(std::pair<Motif_pair, int> element : mdp_database)
if(element.second == k)
pairs_k_discriminant_position.push_back(element.first);
return pairs_k_discriminant_position;
}
int Mdp_toolbox::get_mdp(Motif_pair motif_pair)
{
return mdp_database[motif_pair];
}
std::vector<Motif_class> Mdp_toolbox::get_k_discriminant_motifs(Motif_class motif, int k)
{
std::vector<Motif_class> k_discriminant_motif;
for(std::pair<Motif_pair, int> element : mdp_database)
{
Motif_pair motif_pair = element.first;
if(element.second == k)
{
if (motif_pair.get_short_motif() == motif)
k_discriminant_motif.push_back(motif_pair.get_long_motif());
else if(motif_pair.get_long_motif() == motif)
k_discriminant_motif.push_back(motif_pair.get_short_motif());
}
}
return k_discriminant_motif;
}
std::map<Motif_pair, int> Mdp_toolbox::get_mdp_database()
{
return mdp_database;
}