/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009,...,2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// Local includes
#include "CleaveRule.hpp"
#include "PolChemDef.hpp"


namespace MsXpS
{

namespace libXpertMass
{


/*!
\class MsXpS::libXpertMass::CleaveRule
\inmodule libXpertMass
\ingroup PolChemDefAqueousChemicalReactions
\inheaderfile CleaveRule.hpp

\brief The CleaveRule class provides a model for specifying aqueous cleavage
rules for refining cleavage specifications (\l CleaveSpec) of \l{Polymer}
\l{Sequence}s.

Cleavage rules help refine the description of the chemical reaction that is the
basis of a cleavage (either enzymatic or chemical).

While a number of cleavage agents (like a number of enzymes) do not make
unexpected reactions upon the cleavage (enzymes usually hydrolyze their
substrates), there are chemical agents that while cleaving their
polymer sequence substrate chemically modify the ends of the generated
oligomers. One notorious example is the case of cyanogen bromide, that cleaves
proteins right of methionyl residues. Upon such cleavage, the monomer at the
right side of the generated oligomer (methionyl residue) gets modified
according to this actionformula: "-CH2S+O". This reaction is modelled using a
CleaveRule.

\sa CleaveMotif, CleaveSpec
*/


/*!
\variable MsXpS::libXpertMass::CleaveRule::m_leftCode

\brief The \l Monomer code at the left of the cleavage site.
*/

/*!
\variable MsXpS::libXpertMass::CleaveRule::m_leftFormula

\brief The \l Formula to be applied onto the left monomer code.
*/

/*!
\variable MsXpS::libXpertMass::CleaveRule::m_rightCode

\brief The \l Monomer code at the right of the cleavage site.
*/

/*!
\variable MsXpS::libXpertMass::CleaveRule::m_rightFormula

\brief The \l Formula to be applied onto the right monomer code.
*/

/*!
\brief Constructs a CleaveRule instance

\list
\li \a pol_chem_def_csp: Polymer chemistry definition. Cannot be nullptr.
\li \a name: the name.
\li \a leftCode: The \l Monomer code at the left of the cleavage site.
\li \a leftFormula: .The \l Formula to be applied onto the left monomer code.
\li \a rightCode: .The \l Monomer code at the right of the cleavage site.
\li \a rightFormula: .The \l Formula to be applied onto the right monomer code.
\endlist
*/
CleaveRule::CleaveRule(PolChemDefCstSPtr pol_chem_def_csp,
                       QString name,
                       QString leftCode,
                       QString leftFormula,
                       QString rightCode,
                       QString rightFormula)
  : PolChemDefEntity(pol_chem_def_csp, name),
    m_leftCode(leftCode),
    m_leftFormula(leftFormula),
    m_rightCode(rightCode),
    m_rightFormula(rightFormula)
{
}

/*!
\brief Constructs a CleaveRule instance as a copy of \a other.
 */
CleaveRule::CleaveRule(const CleaveRule &other)
  : PolChemDefEntity(other),
    m_leftCode(other.m_leftCode),
    m_leftFormula(other.m_leftFormula),
    m_rightCode(other.m_rightCode),
    m_rightFormula(other.m_rightFormula)
{
}

/*!
\brief Destructs this CleaveRule instance
*/
CleaveRule::~CleaveRule()
{
}

/*!
\brief Assigns to \a other to this CleaveRule instance.

Returns a reference to this CleaveRule instance.
*/
CleaveRule &
CleaveRule::operator=(const CleaveRule &other)
{
  if(&other == this)
    return *this;

  PolChemDefEntity::operator=(other);

  m_leftCode    = other.m_leftCode;
  m_leftFormula = other.m_leftFormula;

  m_rightCode    = other.m_rightCode;
  m_rightFormula = other.m_rightFormula;

  return *this;
}

/*!
\brief Sets the left \a code.
*/
void
CleaveRule::setLeftCode(const QString &code)
{
  m_leftCode = code;
}

/*!
\brief Returns the left code.
 */
const QString &
CleaveRule::leftCode()
{
  return m_leftCode;
}

/*!
\brief Sets the right \a code.
*/
void
CleaveRule::setRightCode(const QString &code)
{
  m_rightCode = code;
}

/*!
\brief Returns the right code.
 */
const QString &
CleaveRule::rightCode()
{
  return m_rightCode;
}

/*!
\brief Sets the left \a formula.
*/
void
CleaveRule::setLeftFormula(const Formula &formula)
{
  m_leftFormula = formula;
}

/*!
\brief Returns the left formula.
*/
const Formula &
CleaveRule::leftFormula()
{
  return m_leftFormula;
}

/*!
\brief Sets the right \a formula.
*/
void
CleaveRule::setRightFormula(const Formula &formula)
{
  m_rightFormula = formula;
}

/*!
\brief Returns the right formula.
*/
const Formula &
CleaveRule::rightFormula()
{
  return m_rightFormula;
}

/*!
\brief Searches for a CleaveRule instance by \a name in \a cleave_rule_list.

If the instance is found, and \a other is non-nullptr, it is copied to \a other.

Returns the index of the found CleaveRule instance in \a cleave_rule_list or -1
is the cleavage rule was not found.
*/
int
CleaveRule::isNameInList(const QString &name,
                         const QList<CleaveRule *> &cleave_rule_list,
                         CleaveRule *other)
{
  CleaveRule *cleaveRule = 0;

  if(name.isEmpty())
    return -1;

  for(int iter = 0; iter < cleave_rule_list.size(); ++iter)
    {
      cleaveRule = cleave_rule_list.at(iter);
      Q_ASSERT(cleaveRule);

      if(cleaveRule->m_name == name)
        {
          if(other)
            *other = *cleaveRule;

          return iter;
        }
    }

  return -1;
}

/*!
\brief Validates this CleaveRule instance.

Validation entails the following:

\list
\li If the left monomer code is not empty, it must be known to the polymer
chemistry definition. In that case, if the left formula is not empty, it needs
to validate successfully.
\li The same logic is applied to the monomer at the right hand side of the
cleavage site.
\endlist

Returns true if the validation is successful, false otherwise.
 */
bool
CleaveRule::validate()
{
  IsotopicDataCstSPtr isotopic_data_csp =
    mcsp_polChemDef->getIsotopicDataCstSPtr();

  const QList<Monomer *> &monomerRefList = mcsp_polChemDef->monomerList();

  if(!m_leftCode.isEmpty())
    {
      if(Monomer::isCodeInList(m_leftCode, monomerRefList) == -1)
        return false;

      if(m_leftFormula.toString().isEmpty())
        return false;

      if(!m_leftFormula.validate(isotopic_data_csp))
        return false;
    }

  if(!m_rightCode.isEmpty())
    {
      if(Monomer::isCodeInList(m_rightCode, monomerRefList) == -1)
        return false;

      if(m_rightFormula.toString().isEmpty())
        return false;

      qDebug() << "Validating right end cleave rule formula:" << m_rightFormula.toString();

      if(!m_rightFormula.validate(isotopic_data_csp))
        return false;
    }

  return true;
}

/*!
\brief Parses the CleaveRule XML \a element using a \a{version}ed function.

Upon parsing of the \a element, its data are validated and set to this
CleaveRule instance, thus essentially initializing it.

Returns true if parsing and validation were successful, false otherwise.
*/
bool
CleaveRule::renderXmlClrElement(const QDomElement &element, int version)
{
  QDomElement child;

  bool leftCodeSet     = false;
  bool leftFormulaSet  = false;
  bool rightCodeSet    = false;
  bool rightFormulaSet = false;

  /* The xml node we are in is structured this way:
   *
   *  <clr>
   *     <name>Homeseryl</name>
   *     <le-mnm-code>M</le-mnm-code>
   *     <le-formula>-C1H2S1+O1</le-formula>
   *     <re-mnm-code>M</re-mnm-code>
   *     <re-formula>-C1H2S1+O1</re-formula>
   *  </clr>
   *
   * And the element parameter points to the
   *
   * <clr> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   *
   * Which means that xml_node->name == "clr" and that
   * we'll have to go one step down to the first child of the
   * current node in order to get to the <code> element.
   *
   * Note that the DTD stipulates that there can be no or one at most
   * of each left end and/or right end set of data. So be careful
   * with the assertions !
   * This is the DTD material:
   * <!ELEMENT clr((le-mnm-code,le-formula)?,
   *(re-mnm-code,re-formula)?)>
   */

  if(element.tagName() != "clr")
    return false;

  child = element.firstChildElement();

  if(version == 1)
    {
      // no-op

      version = 1;
    }

  if(child.tagName() != "name")
    return false;

  m_name = child.text();

  child = child.nextSiblingElement();

  while(!child.isNull())
    {
      // OK, apparently there is a child element, so let's try to see
      // what's going on. It can either be "le-mnm-code" or "re-mnm-code".

      if(child.tagName() == "le-mnm-code")
        {
          m_leftCode  = child.text();
          leftCodeSet = true;
        }
      else if(child.tagName() == "le-formula")
        {
          m_leftFormula.setFormula(child.text());
          leftFormulaSet = true;
        }
      else if(child.tagName() == "re-mnm-code")
        {
          m_rightCode  = child.text();
          rightCodeSet = true;
        }
      else if(child.tagName() == "re-formula")
        {
          m_rightFormula.setFormula(child.text());
          rightFormulaSet = true;
        }

      child = child.nextSiblingElement();
    }

  // OK, we just finished parsing this <clr> element. Check what we
  // got.

  if(leftCodeSet)
    {
      if(!leftFormulaSet)
        return false;
    }

  if(rightCodeSet)
    {
      if(!rightFormulaSet)
        return false;
    }

  // It cannot be that no single code could be set.
  if(!leftCodeSet && !rightCodeSet)
    return false;

  if(!validate())
    return false;

  return true;
}

/*!
\brief Formats a string representing this CleaveRule instance suitable to use
as an XML element.

The typical cleavage rule element that is generated in this function looks like
this:

  \code
  <clr>
  <re-mnm-code>M</re-mnm-code>
  <re-formula>-CH2S+O</re-formula>
  </clr>
  \endcode

The formatting of the XML element takes into account \a offset and \a
indent by prepending the string with \a offset * \a indent character substring.

\a indent defaults to two spaces.

Returns a dynamically allocated string that needs to be freed after use.
*/
QString *
CleaveRule::formatXmlClrElement(int offset, const QString &indent)
{

  int newOffset;
  int iter = 0;

  QString lead("");
  QString *string = new QString();


  // Prepare the lead.
  newOffset = offset;
  while(iter < newOffset)
    {
      lead += indent;
      ++iter;
    }

  /*
    <clr>
    <re-mnm-code>M</re-mnm-code>
    <re-formula>-CH2S+O</re-formula>
    </clr>
  */

  *string += QString("%1<clr>\n").arg(lead);

  // Prepare the lead.
  ++newOffset;
  lead.clear();
  iter = 0;
  while(iter < newOffset)
    {
      lead += indent;
      ++iter;
    }

  // Continue with indented elements.

  *string += QString("%1<name>%2</name>\n").arg(lead).arg(m_name);

  if(!m_leftCode.isEmpty())
    {
      Q_ASSERT(!m_leftFormula.toString().isEmpty());

      *string +=
        QString("%1<le-mnm-code>%2</le-mnm-code>\n").arg(lead).arg(m_leftCode);

      *string += QString("%1<le-formula>%2</le-formula>\n")
                   .arg(lead)
                   .arg(m_leftFormula.toString());
    }

  if(!m_rightCode.isEmpty())
    {
      Q_ASSERT(!m_rightFormula.toString().isEmpty());

      *string +=
        QString("%1<re-mnm-code>%2</re-mnm-code>\n").arg(lead).arg(m_rightCode);

      *string += QString("%1<re-formula>%2</re-formula>\n")
                   .arg(lead)
                   .arg(m_rightFormula.toString());
    }

  // Prepare the lead for the closing element.
  --newOffset;
  lead.clear();
  iter = 0;
  while(iter < newOffset)
    {
      lead += indent;
      ++iter;
    }

  *string += QString("%1</clr>\n").arg(lead);

  return string;
}

} // namespace libXpertMass

} // namespace MsXpS
