


/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/


// ./tests/peptidoms/catch2-only-peptidoms [zea4291] -s


//    msconvert
//    /gorgone/pappso/formation/Janvier2014/TD/mzXML/20120906_balliau_extract_1_A01_urnb-1.mzXML
//    --filter "index 4291" --mgf

#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>

#include <QString>
#include "../common.h"
#include "tests/tests-config.h"
#include <pappsomspp/core/amino_acid/aastringcodemassmatching.h>
#include <pappsomspp/core/peptide/peptideproformaparser.h>
#include <pappsomspp/core/fasta/fastareader.h>
#include <pappsomspp/core/fasta/fastafileindexer.h>
#include <pappsomspp/core/massspectrum/massspectrum.h>
#include <pappsomspp/core/processing/filters/filterresample.h>
#include <pappsomspp/core/processing/filters/filterpass.h>
#include <pappsomspp/core/processing/filters/filterchargedeconvolution.h>
#include <pappsomspp/core/processing/specpeptidoms/correctiontree.h>
#include <pappsomspp/core/processing/specpeptidoms/locationsaver.h>
#include <pappsomspp/core/processing/specpeptidoms/semiglobalalignment.h>
#include <pappsomspp/core/processing/specpeptidoms/spomsspectrum.h>
#include <pappsomspp/core/processing/specpeptidoms/types.h>
#include <pappsomspp/core/protein/protein.h>
#include <pappsomspp/core/msfile/msfileaccessor.h>


TEST_CASE("test for peptidoms alignment.", "[zea4291]")
{
  // Set the debugging message formatting pattern.
  qSetMessagePattern(QString("%{file}@%{line}, %{function}(): %{message}"));


  SECTION("..:: Check precise alignment for GRMZM2G326111_P01 on spectrum 4291 ::..", "[zea4291]")
  {

    // check out spoms result for index 4291 :
    //  current version gives :
    /*
  "accession": "GRMZM2G326111_P01",
  "positions": [
"eval":{
  "matcher": {
      "score": 107855
  },
  "peptidoms": {
      "bracket": "[G][N]GTGGESIYGEK",
      "nam": 0,
      "score": 69,
      "spc": 11
  }

{"proforma":"PAGRIVMELYANEVPKTAENFRALCTGEKGVGKSGKPLHYKGSTFHRVIPEFMCQGGDFTRGNGTGGESIYGEKFPDEKFVR",
"protein_list":
[{
  "accession": "GRMZM2G326111_P01",
  "positions": [
      15
  ]
}
]
,
"eval":{
  "matcher": {
      "score": 107855
  }
}
}
  */

    // instead of QQVM[MOD:00425]VGYSDSGK from GRMZM2G083841_P01
    /*
     *

{"id":{
    "index": 3624,
    "native_id": "controllerType=0 controllerNumber=1 scan=3625"
}
[
{"proforma":"MATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFH",
"protein_list":
[{
    "accession": "GRMZM2G083841_P01",
    "positions": [
        535
    ]
}
]
,
"eval":{
    "matcher": {
        "score": 415549
    }
}
}
*/

    pappso::QualifiedMassSpectrum spectrum_simple = readQualifiedMassSpectrumMgf(
      QString(CMAKE_SOURCE_DIR)
        .append("/tests/data/scans/20120906_balliau_extract_1_A01_urnb-1_4291.mgf"));

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));

    pappso::specpeptidoms::SpOMSProtein protein(
      "GRMZM2G326111_P01",
      "PAGRIVMELYANEVPKTAENFRALCTGEKGVGKSGKPLHYKGSTFHRVIPEFMCQGGDFTRGNGTGGESIYGEKFPDEKFVR",
      aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);

    pappso::FilterChargeDeconvolution(precision_ptr)
      .filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterResampleKeepGreater(150).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterGreatestY(120).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    qDebug();
    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement

    qDebug();
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.

    qDebug();
    REQUIRE(locations.size() == 0);
  }


  SECTION("..:: Check precise alignment for GRMZM2G083841_P01 on spectrum 4291 ::..", "[zea4291]")
  {

    // check out spoms result for index 4291 :
    //  current version gives :
    /*
  "accession": "GRMZM2G326111_P01",
  "positions": [
"eval":{
  "matcher": {
      "score": 107855
  },
  "peptidoms": {
      "bracket": "[G][N]GTGGESIYGEK",
      "nam": 0,
      "score": 69,
      "spc": 11
  }

{"proforma":"PAGRIVMELYANEVPKTAENFRALCTGEKGVGKSGKPLHYKGSTFHRVIPEFMCQGGDFTRGNGTGGESIYGEKFPDEKFVR",
"protein_list":
[{
  "accession": "GRMZM2G326111_P01",
  "positions": [
      15
  ]
}
]
,
"eval":{
  "matcher": {
      "score": 107855
  }
}
}
  */

    // instead of QQVM[MOD:00425]VGYSDSGK from GRMZM2G083841_P01
    /*
     *

[
{"proforma":"MATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFH",
"protein_list":
[{
    "accession": "GRMZM2G083841_P01",
    "positions": [
        535
    ]
}
]
,
"eval":{
    "matcher": {
        "score": 415549
    }
}
}
*/

    pappso::QualifiedMassSpectrum spectrum_simple = readQualifiedMassSpectrumMgf(
      QString(CMAKE_SOURCE_DIR)
        .append("/tests/data/scans/20120906_balliau_extract_1_A01_urnb-1_4291.mgf"));

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));


    pappso::specpeptidoms::SpOMSProtein protein(
      "GRMZM2G083841_P01",
      "MATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQV"
      "AKRYGVKLTLFH",
      aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);

    pappso::FilterChargeDeconvolution(precision_ptr)
      .filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterResampleKeepGreater(150).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterGreatestY(120).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    qDebug();
    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement

    qDebug();
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.

    qDebug();
    REQUIRE(locations.size() == 1);


    for(auto loc : locations)
      {
        semi_global_alignment.preciseAlign(experimental_spectrum,
                                           &protein,
                                           loc.beginning,
                                           loc.length); // 2e alignement
        pappso::specpeptidoms::Alignment best_alignment =
          semi_global_alignment.getBestAlignment(); // On récupère le meilleur alignement.

        REQUIRE(best_alignment.getNonAlignedMass() == Catch::Approx(371.17253308780885845));
        REQUIRE(best_alignment.getPeptideString(protein.getSequence()).toStdString() ==
                "MVGYSDSGK");
        REQUIRE(best_alignment.m_peptideModel.toInterpretation().toStdString() ==
                "[371.173]MVGYSDSGK");

        REQUIRE(best_alignment.m_peptideModel.toProForma().toStdString() ==
                "[+371.1725]-MVGYSDSGK");
        REQUIRE(best_alignment.SPC == 8);
        REQUIRE_THAT(
          best_alignment.peaks,
          Catch::Matchers::Approx(std::vector<std::size_t>({0, 24, 52, 64, 74, 78, 80, 93})));
        REQUIRE_THAT(best_alignment.shifts,
                     Catch::Matchers::Approx(std::vector<double>({})).margin(0.001));
        REQUIRE(best_alignment.score == 44);
        REQUIRE(best_alignment.beginning == 62);
        REQUIRE(best_alignment.end == 71);
        REQUIRE(protein.getSequence()
                  .mid(best_alignment.beginning, best_alignment.end - best_alignment.beginning)
                  .toStdString() == "MVGYSDSGK");
        REQUIRE(best_alignment.begin_shift == 0);
        REQUIRE(best_alignment.end_shift == Catch::Approx(371.17253308780885845));
        REQUIRE((best_alignment.begin_shift > 0 || best_alignment.end_shift > 0 ||
                 best_alignment.shifts.size() > 0) == true);

        if(best_alignment.begin_shift > 0 || best_alignment.end_shift > 0 ||
           best_alignment.shifts.size() > 0) // Si il y a des potentielles erreurs de masse parente,
                                             // on effectue un post-processing.
          {
            std::vector<double> potential_mass_errors =
              semi_global_alignment.getPotentialMassErrors(
                aa_code, best_alignment, protein.getSequence());

            REQUIRE_THAT(potential_mass_errors,
                         Catch::Matchers::Approx(std::vector<double>({371.17253308780885845,
                                                                      272.1041191732798552,
                                                                      144.04554166663186265,
                                                                      15.9869641599838701})));
            semi_global_alignment.postProcessingAlign(
              experimental_spectrum, &protein, loc.beginning, loc.length, potential_mass_errors);
            pappso::specpeptidoms::Alignment best_post_processed_alignment =
              semi_global_alignment
                .getBestAlignment(); // /!\ L'alignement en post-processing écrase le meilleur
                                     // alignement enregistré dans SemiGlobalAlignment

            REQUIRE_THAT(best_post_processed_alignment.shifts,
                         Catch::Matchers::Approx(std::vector<double>({})));
            REQUIRE(best_post_processed_alignment.m_peptideModel.toInterpretation().toStdString() ==
                    "[Q][Q]VMVGYSDSGK");

            pappso::PeptideSp peptide_sp = pappso::PeptideProFormaParser::parseString(
              best_post_processed_alignment.m_peptideModel.toProForma());

            REQUIRE(peptide_sp.get()->getMass() == Catch::Approx(1297.59725144226672455));

            REQUIRE(best_post_processed_alignment.m_peptideModel.getMass() ==
                    Catch::Approx(1297.59725144226672455));
            REQUIRE(spectrum_simple.getPrecursorMass() == Catch::Approx(1313.5888406209298864));
            REQUIRE(peptide_sp.get()->getMass() - spectrum_simple.getPrecursorMass() ==
                    Catch::Approx(-15.9915891786));

            REQUIRE(spectrum_simple.getPrecursorMass() ==
                    Catch::Approx(best_post_processed_alignment.m_peptideModel.getPrecursorMass()));
            // REQUIRE(experimental_spectrum.getMassList().back() == 0);

            REQUIRE(best_post_processed_alignment.m_peptideModel.toProForma().toStdString() ==
                    "[+15.9916]?QQVMVGYSDSGK");

            REQUIRE(best_post_processed_alignment.getNonAlignedMass() == Catch::Approx(0));

            potential_mass_errors = semi_global_alignment.getPotentialMassErrors(
              aa_code, best_post_processed_alignment, protein.getSequence());

            REQUIRE_THAT(potential_mass_errors, Catch::Matchers::Approx(std::vector<double>({})));
          }
      }
  }
}
