From 0a280237574259ba25509219a906256638c1c388 Mon Sep 17 00:00:00 2001 From: Petar Petrov Date: Fri, 9 Sep 2022 03:10:09 +0100 Subject: academic/vCAPS_coevolution: Added (Coevolution Analysis) Signed-off-by: Dave Woodfall --- academic/vCAPS_coevolution/01_caps_verbose.patch | 114 ++++++++++++++++++ .../vCAPS_coevolution/02_caps_sort_input.patch | 38 ++++++ academic/vCAPS_coevolution/README | 33 ++++++ academic/vCAPS_coevolution/References | 14 +++ academic/vCAPS_coevolution/slack-desc | 19 +++ .../vCAPS_coevolution/vCAPS_coevolution.SlackBuild | 129 +++++++++++++++++++++ academic/vCAPS_coevolution/vCAPS_coevolution.info | 10 ++ 7 files changed, 357 insertions(+) create mode 100644 academic/vCAPS_coevolution/01_caps_verbose.patch create mode 100644 academic/vCAPS_coevolution/02_caps_sort_input.patch create mode 100644 academic/vCAPS_coevolution/README create mode 100644 academic/vCAPS_coevolution/References create mode 100644 academic/vCAPS_coevolution/slack-desc create mode 100644 academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild create mode 100644 academic/vCAPS_coevolution/vCAPS_coevolution.info (limited to 'academic') diff --git a/academic/vCAPS_coevolution/01_caps_verbose.patch b/academic/vCAPS_coevolution/01_caps_verbose.patch new file mode 100644 index 0000000000..7f64d80f34 --- /dev/null +++ b/academic/vCAPS_coevolution/01_caps_verbose.patch @@ -0,0 +1,114 @@ +diff -pruN orig/caps.cpp new/caps.cpp +--- orig/caps.cpp 2012-12-15 17:13:23.000000000 +0200 ++++ new/caps.cpp 2020-09-09 23:07:46.080566000 +0300 +@@ -14,7 +14,7 @@ + #include + #include + #include +- ++#include + + + +@@ -69,6 +69,8 @@ + const gsl_rng_type * T; + gsl_rng *r; + ++vector totaltempnew; ++double alphathresh = 0; + int main(int argc, char *argv[]){ + + +@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){ + + + print_splash(output); ++ OUTPUT << "\n\File1: " << files[i] << endl; + vec1.print_to_fasta(output.c_str()); ++ OUTPUT << "\n\nFile2: " << files[j] << endl; + vec2.print_to_fasta(output.c_str()); + int length1 = vec1.sequences[0].length(); + int length2 = vec2.sequences[0].length(); + ++ OUTPUT << "\n\nLength1: " << length1 << endl; ++ OUTPUT << "Length2: " << length2 << endl; + + + if(tree_in ==0){ + tree1 = create_input_tree(vec1.names, vec1.sequences); + tree2 = create_input_tree(vec2.names, vec2.sequences); ++ ++ // Output the CAPS generated trees to the .out file of each pair ++ string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true); ++ string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true); ++ OUTPUT << "\n" << endl; ++ OUTPUT << "CAPS generated tree 1: " << temptre1 << endl; ++ OUTPUT << "CAPS generated tree 2: " << temptre2 << endl; + }/*else if(tree_in ==1 && variable==1){ + + std::auto_ptr DS; +@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){ + int value = floor(((totaltemp.size())*(1-(threshval))))+1; + + threshold = totaltemp[value]; ++ totaltempnew = totaltemp; + + + /*=======================================================*/ +@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_ + + } /* ----- end of function Chi_squared ----- */ + ++/* ++ * === FUNCTION ====================================================================== ++ * Name: find_alpha ++ * Description: Find the index of an element in a vector totaltemp ++ * Help from: https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/ ++ * https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above ++ * Author: Petar Petrov, University of Turku (Finland); pebope@utu.fi ++ * ===================================================================================== ++ */ ++double getIndex(std::vector const& v, double K) ++{ ++ auto const it = std::lower_bound(v.begin(), v.end(), fabs(K)); ++ //auto it = std::upper_bound(v.begin(), v.end(), fabs(K)); ++ ++ if (it != v.end()) { ++ int index = distance(v.begin(), it); ++ alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size()); ++ return alphathresh; ++ //cerr << index << "\t" << alphathresh << endl; ++ } ++ else { ++ cerr << "ELEMENT NOT FOUND!" << endl; ++ } ++} + + + +@@ -890,9 +928,9 @@ int print_inter(vector& Correl1, + output << endl << endl; + + output << "Coevolving Pairs of amino acid sites\n"; +- output << "=============================================================================\n"; +- output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n"; +- output << "=============================================================================\n"; ++ output << "================================================================================================================================\n"; ++ output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n"; ++ output << "================================================================================================================================\n"; + + //double mean = average_vec(Correl); + //double SD = SD_vf(Correl, mean); +@@ -951,9 +989,11 @@ int print_inter(vector& Correl1, + + // } + ++ double Alpha1 = getIndex(totaltempnew, Correl1[cor]); ++ double Alpha2 = getIndex(totaltempnew, Correl2[cor]); + //if(bootval>=bootcut && re1<=8 && re2<=8 ){ + if(bootval>=bootcut){ +- output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl; ++ output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl; + signif.push_back(((Correl1[cor]+Correl2[cor])/2)); + ++pairs; + vector tem; diff --git a/academic/vCAPS_coevolution/02_caps_sort_input.patch b/academic/vCAPS_coevolution/02_caps_sort_input.patch new file mode 100644 index 0000000000..917bca7cab --- /dev/null +++ b/academic/vCAPS_coevolution/02_caps_sort_input.patch @@ -0,0 +1,38 @@ +diff -pruN old/caps.cpp new/caps.cpp +--- old/caps.cpp 2022-01-20 09:30:24.409886691 +0200 ++++ new/caps.cpp 2022-01-20 09:39:10.669845265 +0200 +@@ -1,4 +1,5 @@ + #include ++#include + #include + #include"BCFasta.h" + #include"file_manip.h" +@@ -15,7 +16,7 @@ + #include + #include + #include +- ++#include + + + #include +@@ -69,6 +70,10 @@ + const gsl_rng_type * T; + gsl_rng *r; + ++// make sure filenames are sorted! ++// https://stackoverflow.com/a/34757557 ++bool compareFunction (std::string a, std::string b) {return a totaltempnew; + double alphathresh = 0; + int main(int argc, char *argv[]){ +@@ -189,6 +194,8 @@ int main(int argc, char *argv[]){ + + vector files; + files = Folder_to_vector(mystring.c_str()); ++ // make sure filenames are sorted! ++ std::sort(files.begin(),files.end(),compareFunction); + + Fasta_vector file; + file.ref_num=0; diff --git a/academic/vCAPS_coevolution/README b/academic/vCAPS_coevolution/README new file mode 100644 index 0000000000..b8b4a71a4b --- /dev/null +++ b/academic/vCAPS_coevolution/README @@ -0,0 +1,33 @@ +vCAPS: (verbose) Coevolution Analysis using Protein Sequences + +CAPS is aimed at measuring the coevolution between amino acid sites +belonging to the same protein (intra-molecular coevolution) or to two +functionally or physically interacting proteins (inter-molecular +coevolution). The Software implements an improved method to detect +intra-molecular coevolution as published in Genetics (Fares and Travers, +2006) and also inter-protein coevolution analysis. The improved scoring +of amino acid sites is obtained by maximum likelihood ancestral state +reconstruction along with simulations to assess significance. + +This is a modified version of "CAPS_coevolution", used in the AutoCoEv +pipeline: +https://github.com/mattilalab/autocoev + +It applies two _unofficial_ patches: +- 01_caps_verbose: makes the program output its generated trees, as well + as the p-value for each correlated amino acid pair +- 02_caps_sort_input: introduce a function to sort input file names + +The produced executable is called "vCAPS" and can be installed along +"caps" from CAPS_coevolution. Building CAPS from source requires the +Bio++ 1.9 suite libraries, but make sure the current versions of the +bppsuite (and its dependencies) are NOT installed at build time. + +Citing CAPS: +CAPS: coevolution analysis using protein sequences. Fares MA, McNally D. +Bioinformatics. 2006 Nov 15;22(22):2821-2. PMID: 17005535 + +The mathematical model has been described separately: +A novel method for detecting intramolecular coevolution: adding a +further dimension to selective constraints analyses. Fares MA, Travers +SA. Genetics. 2006 May;173(1):9-23. PMID: 16547113 diff --git a/academic/vCAPS_coevolution/References b/academic/vCAPS_coevolution/References new file mode 100644 index 0000000000..e8236cd282 --- /dev/null +++ b/academic/vCAPS_coevolution/References @@ -0,0 +1,14 @@ +If you use CAPS in your research, please include the following citations: + +CAPS: coevolution analysis using protein sequences. +Fares MA, McNally D. +Bioinformatics. 2006 Nov 15;22(22):2821-2. +PMID: 17005535 +https://www.ncbi.nlm.nih.gov/pubmed/17005535 + +The mathematical model has been described separately: +A novel method for detecting intramolecular coevolution: adding a further dimension to selective constraints analyses. +Fares MA, Travers SA. +Genetics. 2006 May;173(1):9-23. +PMID: 16547113 +https://www.ncbi.nlm.nih.gov/pubmed/16547113 diff --git a/academic/vCAPS_coevolution/slack-desc b/academic/vCAPS_coevolution/slack-desc new file mode 100644 index 0000000000..5cd78f0770 --- /dev/null +++ b/academic/vCAPS_coevolution/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. +# Line up the first '|' above the ':' following the base package name, and +# the '|' on the right side marks the last column you can put a character in. +# You must make exactly 11 lines for the formatting to be correct. It's also +# customary to leave one space after the ':' except on otherwise blank lines. + + |-----handy-ruler------------------------------------------------------| +vCAPS_coevolution: vCAPS_coevolution (Coevolution Analysis using Protein Sequences) +vCAPS_coevolution: +vCAPS_coevolution: CAPS is aimed at measuring the coevolution between amino acid +vCAPS_coevolution: sites belonging to the same protein (intra-molecular coevolution) +vCAPS_coevolution: or to two functionally or physically interacting proteins (inter- +vCAPS_coevolution: molecular coevolution). In addition, a test which assesses +vCAPS_coevolution: whether two proteins are interacting is implemented. +vCAPS_coevolution: +vCAPS_coevolution: Home: http://bioinf.gen.tcd.ie/~faresm/software/software.html +vCAPS_coevolution: +vCAPS_coevolution: diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild new file mode 100644 index 0000000000..f00aea698e --- /dev/null +++ b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild @@ -0,0 +1,129 @@ +#!/bin/bash + +# Slackware build script for vCAPS_coevolution + +# Copyright 2020-2022 Petar Petrov slackalaxy@gmail.com +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cd $(dirname $0) ; CWD=$(pwd) + +PRGNAM=vCAPS_coevolution +VERSION=${VERSION:-2.0_2UN} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} +PKGTYPE=${PKGTYPE:-tgz} + +SRCNAM=caps +SRCVER=2.0 +BINNAM=vCAPS + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i586 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +if [ ! -z "${PRINT_PACKAGE_NAME}" ]; then + echo "$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE" + exit 0 +fi + +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i586" ]; then + SLKCFLAGS="-O2 -march=i586 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +LIBDIRPATH="-Wl,-rpath,/usr/lib${LIBDIRSUFFIX}/Bpp1.9" + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +rm -rf ${SRCNAM}${SRCVER}_src +unzip $CWD/${SRCNAM}2_src.zip +cd ${SRCNAM}${SRCVER}_src +chown -R root:root . +find -L . \ + \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \ + -o -perm 511 \) -exec chmod 755 {} \; -o \ + \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \ + -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \; + +# This is needed for gcc in Slackware 14.2 +sed -i "s:CC=g++ -g:CC=g++-5 -g -std=c++11:" Makefile + +# Use our CFLAGS and the custom (legacy) lib path +sed -i "s:CFLAGS=:CFLAGS=$SLKCFLAGS $LIBDIRPATH:" Makefile + +# Find the legacy bpp libraries +sed -i "s:-lbpp-phyl:-L/usr/lib${LIBDIRSUFFIX}/Bpp1.9 -lbpp-phyl:g" Makefile +sed -i "s:-lbpp-numcalc:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-numcalc:g" Makefile +sed -i "s:-lbpp-utils:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-utils:g" Makefile +sed -i "s:-lbpp-seq:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-seq:g" Makefile + +# Rename the produced executable +sed -i "s:-o caps:-o $BINNAM:" Makefile + +# Use our patches +patch -p1 -i $CWD/01_caps_verbose.patch +patch -p1 -i $CWD/02_caps_sort_input.patch + +# we already specified g++-5 above, so no need for this +#source /etc/profile.d/gcc5.sh + +make all + +# Install the binary produced from our patched source, as "vCAPS" +install -D -m755 $BINNAM $PKG/usr/bin/$BINNAM + +find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \ + | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +mkdir -p $PKG/usr/share/$PRGNAM +cp -a sample structures trees TLR1.fa.out $PKG/usr/share/$PRGNAM + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a \ + caps_manual.pdf \ + $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild +cat $CWD/References > $PKG/usr/doc/$PRGNAM-$VERSION/References + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.info b/academic/vCAPS_coevolution/vCAPS_coevolution.info new file mode 100644 index 0000000000..48545099b1 --- /dev/null +++ b/academic/vCAPS_coevolution/vCAPS_coevolution.info @@ -0,0 +1,10 @@ +PRGNAM="vCAPS_coevolution" +VERSION="2.0_2UN" +HOMEPAGE="http://bioinf.gen.tcd.ie/caps/home.html" +DOWNLOAD="https://raw.githubusercontent.com/slackalaxy/sources/main/caps2_src.zip" +MD5SUM="0914007c32ed22a9cb8a47b55cd18a39" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +REQUIRES="bpp1.9-phyl" +MAINTAINER="Petar Petrov" +EMAIL="slackalaxy@gmail.com" -- cgit v1.2.3