///////
   //    HtmlParser.cc
   //    HtmlParser Class definitions
   //
   //    Class for parsing HTML documents
   //
   //    Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
   //    Some Portions Copyright (c) 2008 Devise.IT srl <http://www.devise.it/>
   //    Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
   //
   //    For copyright details, see the file COPYING in your distribution
   //    or the GNU General Public License version 2 or later 
   //    <http://www.gnu.org/copyleft/gpl.html>
   //
   //    $Id: HtmlParser.cc,v 1.87 2008-12-23 09:52:11 angusgb Exp $
   //
   //    G.Bartolini
   //    started: 30.03.2000
///////

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_STD
#include <sstream>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <sstream.h>
#endif /* HAVE_STD */

#include <ctype.h>   // for isspace()

#include "Scheduler.h"
#include "HtmlParser.h"
#include "HtSGMLCodec.h"
#include "Configuration.h" // for META attributes parsing

// Static variables initialization
int HtmlParser::debug = 0;

// This define the maximum number of characters present in an HTML tag
// between the starting '<' and the closing '>'.
#define MAX_TAG_SIZE 4096

// Location in the document
#define TAGhead     0x0001 // The <HEAD> tag is open
#define TAGtitle    0x0002 // The <TITLE> tag is open
#define TAGlink     0x0004 // The <A> tag is open
#define TAGscript   0x0008 // if a <SCRIPT> tag is open, it's true
#define TAGhx       0x0020 // Current Tag: <Hx>
#define TAGrefresh  0x0800 // Current Tag: <meta> with refresh

// Accessibility info (ACHECK - accessibility check) for documents
#define ACHECKDOCtitle   0x0001 // The document title is present

// Accessibility info (ACHECK - accessibility check) for tags
#define ACHECKTAGalt   0x0001 // The ALTernative has been specified
#define ACHECKTAGinputimg 0x0002 // The INPUT is an image

// ALT text checks
#define ALTempty		0x0001	// Empty ALT
#define ALTsameasfile	0x0002	// Same name as file
#define ALTlong			0x0004	// ALT too long

// Pre-processor alias
#define encodeSGML(x) (HtSGMLCodec::instance()->encode(x))

//*****************************************************************************
// void HtmlParser::encodeURL(std::string &str, char *valid)
//   Convert a normal string to a URL 'safe' string.  This means that
//   all characters not explicitly mentioned in the URL BNF will be
//   escaped.  The escape character is '%' and is followed by 2 hex
//   digits representing the octet.
//
void HtmlParser::encodeURL(std::string &str, const std::string& reserved_chars)
{
    std::string      temp;
    static const char *digits = "0123456789ABCDEF";
    const char* valid (reserved_chars.c_str());

#ifdef HTCHECK_DEBUG
	std::cout << "Decoding URL: " << str << " - using : " << reserved_chars << std::endl;
#endif

    for (std::string::const_iterator p(str.begin()); p != str.end() ; ++p)
    {
        if (isascii(*p) && (isdigit(*p) || isalpha(*p) || strchr(valid, *p)))
            temp.push_back(*p);
        else
        {
            temp.push_back('%');
            temp.push_back(digits[(*p >> 4) & 0x0f]);
            temp.push_back(digits[*p & 0x0f]);
        }
    }
    str = temp;
}

// Default constructor
HtmlParser::HtmlParser()
: CurrentScheduler(0), BaseUrl(0), Charset(), DocType(), LinkDescription(),
    Description(), Keywords(), DocLanguage(),
#ifdef HTDIG_NOTIFICATION
    HtDigEmail(), HtDigEmailSubject(), HtDigNotificationDate(),
#endif
    CurrentHx(0), PreviousHx(0),
	 HxStep(0), CurrentAltText(), CurrentResourceRef(), AltAttrPosition(0),
    ignore(false), memo(true), location(0), doc_acheck(0),
    store_statement(true), CurrentTag(HtmlStatement::Tag_Unknown)
{
    HtmlStatement::initElementsMap();
    HtmlAttribute::initAttributesMap();
}

// Destructor
HtmlParser::~HtmlParser()
{
   if (BaseUrl && BaseUrl != CurrentScheduler->CurrentUrl)
      delete BaseUrl; // Base Url different from CurrentUrl. So delete it.
}


// Operator overloading () -> makes this function a function object.
// This is used by the Scheduler object in order to parse a 
// document (previously retrieved)

HtmlParser::HtmlParser_Codes HtmlParser::operator() (Scheduler &scheduler)
{

   // Initialization
   CurrentScheduler = &scheduler;
   location = 0;
   ignore = false;
   memo = true;
   doc_acheck = 0;


   // HTML Title of the document
   std::string Title;
   std::string decodedTitle;
   
   // Set debug Level
   SetDebugLevel(CurrentScheduler->GetDebugLevel());
   
   // Contents of the document - Copy
   std::string Contents(CurrentScheduler->CurrentResponse->GetContents().get());

   // position is set to the beginning of the retrieved document contents
   position = const_cast <HTCHECK_CHAR*> (Contents.c_str());
   
   // Initialize the tag position index
   TagPosition = 0;
   LastLinkTagPosition = 0;

   // Initialize the row number
   row = 1;

   // Initialize the pointer to the beginning of the line
   linebeginning = position;

   // Initialize the charset string
   Charset.clear();

   // Initialize the doctype string
   DocType.clear();

   // Initialize the description string
   Description.clear();

   // Initialize the keywords string
   Keywords.clear();

   // Initialize the document language string
   DocLanguage.clear();

#ifdef HTDIG_NOTIFICATION
    // Initialise ht://Dig notification variables
    HtDigEmail.clear();
    HtDigEmailSubject.clear();
    HtDigNotificationDate.clear();
#endif

   // Initialise the current and previous header information
   PreviousHx = 0;
   CurrentHx = 0;
   HxStep = 0;

   // Initialize the current ALT text
   CurrentAltText.clear();

   // Initialize the resource reference
   CurrentResourceRef.clear();

   // Attribute position for ALT (inside the tag)
   AltAttrPosition = 0;

   // Assign the base URL used for resolving relative paths
   BaseUrl = CurrentScheduler->CurrentUrl;

   // Let's start parsing the HTML document, from the beginning
   while (*position)
   {

      // Let's check for a comment or a possible DTD declaration

      if (strncmp((char *)position, "<!", 2) == 0)
      {
         position +=2;
         if (strncmp((char *)position, "--", 2) == 0)
         {
            position += 2;
            // Yes ... it is a comment - Go to its end
            do    // Loop until we find a '>' preceded by 2 '-' at least
            {

               int cons_dashes = 0; // Counter for consecutive dashes
               for (ppos = position; *ppos && (cons_dashes < 2); ++ppos)
               {
                  if (*ppos == '-')
                     ++cons_dashes;
                  else
                  {
                     cons_dashes = 0;
                     if (*ppos == (HTCHECK_CHAR) 10)
                        newRow();
                  }
               }

               if (cons_dashes < 2)
               {
                  *position ='\0';
                  break;
               }
               else
               {
               
                  // Here we are after a a '--'
                  position = ppos;
               
                  // Skip extra dashes after a badly formed comment
                  while (*position == '-')
                     ++position;

                  // Skip whitespace
                  while (isspace(*position))
		  {
                     if (*position == (HTCHECK_CHAR) 10)
                        newRow();

                     ++position;
                  }
               }
            } while (*position && *position != '>');
            
            if (*position == '>')
               ++position; // End of comment
         }
         else if (strncmp((char *)position, "[CDATA[", 7) == 0)
         {
             position += 7;
             // Yes ... it is a CDATA block - Go to its end
             do    // Loop until we find a '>' preceded by 2 ']]' at least
             {

                int cons_dashes = 0; // Counter for consecutive square close brackets
                for (ppos = position; *ppos && (cons_dashes < 2); ++ppos)
                {
                   if (*ppos == ']')
                      ++cons_dashes;
                   else
                   {
                      cons_dashes = 0;
                      if (*ppos == (HTCHECK_CHAR) 10)
                         newRow();
                   }
                }

                if (cons_dashes < 2)
                {
                   *position ='\0';
                   break;
                }
                else
                {

                   // Here we are after a a ']]'
                   position = ppos;

                   // Skip extra dashes after a badly formed comment
                   while (*position == ']')
                      ++position;

                   // Skip whitespace
                   while (isspace(*position))
 			 	  {
                      if (*position == (HTCHECK_CHAR) 10)
                         newRow();

                      ++position;
                   }
                }
             } while (*position && *position != '>');

             if (*position == '>')
                ++position; // End of CDATA block
         }
         else
         {
            // It's not a comment declaration but could be a DTD declaration
            for (ptext = text; *position && *position != '>'; ++position)
            {
               if (*position == (HTCHECK_CHAR) 10)
                  newRow();
			   else
					*ptext++ = *position;
            }
			*ptext = '\0';

			if (!mystrncasecmp((const char *)text, "doctype", 7))
			{
					for (ptext = text + 7; *ptext && isspace(*ptext); ++ptext); // Skip any whitespace
					DocType = (const char *) ptext; // Assign the DocType to the parser variable
			}

            if (position && *position)
               ++position; // Found the end. Let's skip the char
         }
         
         continue;
         
      }
      
      if (*position =='<')
      {
         ++position; // skip the initial '<'
         
         // Now ... something strange may appear. Let's think of
         // a malformed HTML document, in which the writer puts
         // a '<' symbol instead of a '&lt;' sgml entity.
         // Let's try to catch it, even if it is very difficult;
         
         // Do we have a valid character after the '<'?
         while (isspace(*position))
         {
            if (*position == (HTCHECK_CHAR) 10)
               newRow();

            ++position;
         }

         // Maybe it wasn't a valid tag
         // If we are here we may assume we have a valid character,
         // after '<', so an alpha char, or a '/' for closing tags.
         // But we can also have something like:
         // <B.%2  -- Don't ask me why, but somebody got it!!!
         
         // Another check to perform is if we find a not alphabetic
         // character before a space or a closing tag.

         bool not_good = false;         
         for (ppos = position; !not_good && *ppos && !isspace(*ppos)
            && *ppos != '>'; ++ppos)
         {
            // cout << *ppos << endl;
            if (!isalnum(*ppos) && *ppos!='/')
               not_good = true;
         }
         
         // We found a not valid characther before a space! Skip this tag.
         if (not_good)
			   continue;
                  
         // Start of a tag. Let's search for the closing '>'
         // But we can also have it after the previous loop
         if (*ppos && *ppos != '>')
            ppos = (HTCHECK_CHAR *) strchr((char *)position, '>');
         
         if (ppos)
         {

            // Another trick to catch a malformed tag declaration
            // that is to say a missing '&lt;', let's check if
            // the tag size is bigger than a fixed size (MAX_TAG_SIZE)
            
            if ((int) (ppos - position) > MAX_TAG_SIZE)
               continue;
            
            // Set the column of the statement
            col = position - linebeginning;
            
            // Temporary bookmark for the end of the tag
             HTCHECK_CHAR* pend = ppos;

             // Skip any white space at the end
            for (--ppos; *ppos && isspace(*ppos); --ppos);

            // Found. Let's copy it, by skipping '<' and '>'
            ptext=text;

      	    // copy the characters from the source to the destination            
            while (position <= ppos)
            {
//               cout << (int) (ppos - position) << " _  " << (int) position
//                  << " _  " << (int) ppos << ": " << *position << endl;
               *ptext++ = *position++;
            }

            *ptext='\0';   // close the string
            position = pend + 1;    // Skip the closing '>'

            ++TagPosition;

            // Let's parse the tag by using the member attribute 'text'
      	    // and then Status of the parser
            switch(ParseTag())
            {
               case HtmlParser_NullTag:
                  if (debug > 1)
                     cout << "Warning! Empty (NULL) tag: "
                        << htmlstatement << " - " << text << endl;
                  break;

               case HtmlParser_TagNotStored:
                  if (debug > 3)
                     cout << "Tag not stored: "
                        << htmlstatement << " - " << text << endl;
                  break;

               case HtmlParser_MalformedTag:
                  if (debug > 0)
                     cout << "Warning! Malformed tag: "
                     << htmlstatement << " - " << text << endl;
                  break;

               case HtmlParser_StatementFailed:
                  if (debug > 0)
                     cout << "Error! Insert of HTML statement failed: "
                        << htmlstatement << " - " << text << endl;
                  return HtmlParser_StatementFailed;
                  break;

               case HtmlParser_AttributeFailed:
                  if (debug > 0)
                     cout << "Error! Insert of HTML attribute failed: "
                        << htmlattribute << " - " << text << endl;
                  return HtmlParser_AttributeFailed;
                  break;

               case HtmlParser_LinkFailed:
                  if (debug > 0)
                     cout << "Error! Insert of this link failed: "
                        << link << " - " << text << endl;
                     return HtmlParser_AttributeFailed;
                  break;

               case HtmlParser_OK:  // Do nothing
               default:  // Do nothing
                  break;
            }
            
         }
         else
         {
            while (*position)
               ++position;  // reach the end (no more tags)
         }
      }
      else
      {
         // We are in the title. Let's store it
         if (location & TAGtitle)
            Title.push_back(*position);
	 else if (location & TAGlink)
	 {
            if (isspace(*position))
	    {
	       if (LinkDescription.length() > 0 && !isspace(LinkDescription[LinkDescription.length() -1]))
                  LinkDescription.push_back(' ');
	    }
	    else
               LinkDescription.push_back(*position);
	 }

         // If it is a newline we increment the row number 
         if (*position == (HTCHECK_CHAR) 10)
            newRow();

         ++position;
      }

   }

   CurrentScheduler->CurrentUrl->SetTitle(encodeSGML(Title));
   CurrentScheduler->CurrentUrl->SetCharset(Charset);
   CurrentScheduler->CurrentUrl->SetDocType(DocType);
   CurrentScheduler->CurrentUrl->SetDescription(Description);
   CurrentScheduler->CurrentUrl->SetKeywords(Keywords);
#ifdef HTDIG_NOTIFICATION
    CurrentScheduler->CurrentUrl->SetHtDigEmail(HtDigEmail);
    CurrentScheduler->CurrentUrl->SetHtDigEmailSubject(HtDigEmailSubject);
    if (HtDigNotificationDate.length() &&
        parseDate(HtDigNotificationDate))
            CurrentScheduler->CurrentUrl->SetHtDigNotificationDate(HtDigNotificationDate);
#endif

   // If Accessibility Checks are not enabled we exit
   if (!CurrentScheduler->Config->Boolean("accessibility_checks"))
      return HtmlParser_OK;

   // //////////////////////////////////////////////////////
   // Begin of accessibility checks (document level)
   // //////////////////////////////////////////////////////
   // Missing TITLE (Open Accessibility Check: Code 50)
   if (!(doc_acheck & ACHECKDOCtitle))
   {
      // The accessibility check needs to be inserted
      if (!InsertAccessibilityCheck(CurrentScheduler->CurrentUrl->GetID(), 0, 0, 50))
         return HtmlParser_AccessibilityCheckFailed; // Failed
   }
   else
   {
      // We have a title
      unsigned counter =  CountSGMLStringLength ( CurrentScheduler->CurrentUrl->GetTitle().c_str() );

	  if (!counter)
	  {
         // The accessibility check needs to be inserted
         if (!InsertAccessibilityCheck(CurrentScheduler->CurrentUrl->GetID(), 0, 0, 51))
            return HtmlParser_AccessibilityCheckFailed; // Failed
	  }
	  else if (counter >= 150)
	  {
         // The accessibility check needs to be inserted
         if (!InsertAccessibilityCheck(CurrentScheduler->CurrentUrl->GetID(), 0, 0, 52))
            return HtmlParser_AccessibilityCheckFailed; // Failed
	  }

   }

   // Document language
   if (DocLanguage.length())
   {
      // Check for a valid value
   }
   else
   {
      // The accessibility check needs to be inserted
      if (!InsertAccessibilityCheck(CurrentScheduler->CurrentUrl->GetID(), 0, 0, 48))
         return HtmlParser_AccessibilityCheckFailed; // Failed
   }

   // //////////////////////////////////////////////////////
   // End of accessibility checks (document level)
   // //////////////////////////////////////////////////////
   return HtmlParser_OK;
   
}


HtmlParser::HtmlParser_Codes HtmlParser::ParseTag ()
{

   bool has_attributes = false;
   bool tag_stored = false;
   bool malformed_tag = false;
   int tag_acheck(0);
   CurrentHx = 0;
   // Reset all the not important tag info from the location
   location &= ~(TAGhx | TAGrefresh);
   CurrentTag = HtmlStatement::Tag_Unknown;

   // Initialize alternative text and resource reference strings
   CurrentAltText.clear();
   CurrentResourceRef.clear();
   AltAttrPosition = 0;
   
   // Temporary pointer
   register HTCHECK_CHAR *ptmp;

   // Statement
   register HTCHECK_CHAR *Statement = text;

   // Skip initial spaces
   while (*Statement && isspace(*Statement))
   {
      if (*Statement == (HTCHECK_CHAR) 10)
         newRow();
      ++Statement;
   }

   if (!*Statement)
      return HtmlParser_NullTag;   // Empty

   // Reset htmlstatement variable
   htmlstatement.Reset();
   
   // Set the IDUrl for the HtmlStatement object
   htmlstatement.SetIDUrl(CurrentScheduler->CurrentSchedule.GetIDSchedule());

   // Set the whole statement
   htmlstatement.SetStatement(Statement);
   
   // Set the tag position
   htmlstatement.SetTagPosition(TagPosition);

   // Set the row number
   htmlstatement.SetRow(row);

   // Set the col number
   htmlstatement.SetCol(col);

   // Set the tag position of the last link (open link - 'A' element)
   htmlstatement.SetLinkTagPosition(LastLinkTagPosition);

   // Check if we have an empty tag
   if (Statement[strlen(Statement) - 1] == '/')
       htmlstatement.empty();

   ptmp=Statement;   // Stores the beginning of the tag

   while (*Statement && !isspace(*Statement))
      ++Statement;

   if (ptmp==Statement) // No tag !!!
      return HtmlParser_NullTag;

   if (*Statement)
   {
      if (*Statement == (HTCHECK_CHAR) 10)
           newRow();

      // Check for a tag with attributes
      *Statement='\0';

      if (debug>5)
         cout << "Tag found: " << ptmp << endl;

      // go on
      ++Statement;   
      
      // Skip everything but alphanum chars after the tag
      while (*Statement && !isalpha(*Statement))
      {
         if (*Statement == (HTCHECK_CHAR) 10)
            newRow();
         ++Statement;
      }
      
      if (*Statement)
         has_attributes = true; // The current tag has attributes

   }
   
   htmlstatement.SetTag(ptmp);   

   // Determine the type of the tag (end, start)
   if (*ptmp == '/')
   {
       ++ptmp; // skip the slash
   }

   // We got the TAG info we need
   int old_location = location;
   if (! CheckTag(htmlstatement))
      memo=false;    // Not store it
   else memo=true;

   // Should we insert a link description for the previos 'A' element?
   if (CurrentScheduler->Config->Boolean("store_link_info")
      && !(location & TAGlink) && (old_location & TAGlink) && LinkDescription.length() > 0)
   {
       if (LinkDescription.length() > 0 &&
          !CurrentScheduler->GetDB()->InsertHtmlStatementLinkDescription(htmlstatement.GetIDUrl(),
             LastLinkTagPosition, encodeSGML(LinkDescription)))
          return HtmlParser_StatementFailed; // Failed
	   LastLinkTagPosition = 0; // erase the position of the last tag with a link
   }

   if (ignore)
   {
      if (! (location & TAGscript))
      {
         // We just found a closing </SCRIPT> tag
         ignore = false;
         memo = true;
      }
      else memo = false;
   }
   else
   {
      if (location & TAGscript)    // We found a <SCRIPT> tag. We ignore the following tags
         ignore = true;
   }

   // We don't have to store it
   if (!memo)
      return HtmlParser_TagNotStored;

   if (has_attributes)
   {
      // Let's look for attributes
      // Starting point: Statement now points to the first attribute

      unsigned int AttrPosition = 0;

      while (*Statement)   // Until we reach the end look for attributes
      {
         ptmp = Statement;

      // Look for an attribute definition
      // Goes on until we reach:
      // 1) the end or until a whitespace not follwed by '=' (empty attribute)
      // 2) a '=': the attribute has a content which may contain SGML entities too
      
         while (*Statement && !isspace(*Statement) && *Statement!='=')
            ++Statement;

         while (*Statement && isspace(*Statement))
         {
            if (*Statement == (HTCHECK_CHAR) 10)
               newRow();

            *Statement++='\0'; // Close the attribute string
         }

         if (ptmp == Statement) // No attribute !!!
         {
            // Hey guys, if statement is not empty, this may
            // represent a malformed tag. Let's show it!
            if (*Statement)
               malformed_tag = true;
             
            *Statement='\0';
            continue;
         }

         // Reset htmlattribute variable
         htmlattribute.Reset();
   
         // Set the IDUrl for the HtmlAttribute object
         htmlattribute.SetIDUrl(htmlstatement.GetIDUrl());

         // Set the tag position
         htmlattribute.SetTagPosition(TagPosition);

         // Set the attribute position
         htmlattribute.SetAttrPosition(++AttrPosition);

         bool has_content = false;
	 // Store attribute is set according to the 'store_only_links' value
         store_statement = !CurrentScheduler->Config->Boolean("store_only_links");
         
         if (*Statement && *Statement == '=')
         {
               has_content = true;  // Attribute has a content
               *Statement++='\0';
         }

         htmlattribute.SetAttribute((char *)ptmp);

         if (has_content)
         {
            // The content can be written inside '"' or not.
            // If yes we search for next '"', else for the first space.

            while(*Statement && (isspace(*Statement) || *Statement=='='))
            {
               if (*Statement == (HTCHECK_CHAR) 10)
                  newRow();
               ++Statement;   // Skip spaces after '=' or multiple '='
            }
            
            if (*Statement)
            {

               // Not empty content
               if (*Statement == '"' || *Statement == '\'')
               {

                  char qm=*Statement;  // Store the quotation mark
                  ++Statement;         // Skip quotation mark (' or ")

                  ptmp=Statement;      
                  
                  // Look for a closing quotation mark
                  Statement = (HTCHECK_CHAR *) strchr ((char *)ptmp, qm);
                  
                  if (Statement)
                  {
                     // Found.
                     *Statement = '\0';
                     ++Statement;
                  }
                  else
                  {
                     // Not found the closing quotation mark
                     // Everything is content
                     Statement=ptmp;
                     while (*Statement)
                     {
                        if (*Statement == (HTCHECK_CHAR) 10)
                           newRow();
                        ++Statement; // reach the end
                     }
                  }

                  // Set content                  
                  htmlattribute.SetContent((char *)ptmp);
                  
               }
               else
               {
                  // Content outside a quotation mark
                  ptmp=Statement;
               
                  // Content is considered until a whitespace or the end
                  // is reached.
               
                  while (*Statement && !isspace(*Statement))
                     ++Statement;
               
                  if (*Statement)
                  {
                     if (*Statement == (HTCHECK_CHAR) 10)
                        newRow();
                     *Statement='\0';
                     ++Statement;
                  }
               
                  htmlattribute.SetContent((char *)ptmp);
               
               }

            }

            // We got a HTML attribute with a content.
            // Let's find a Link
            
            switch(FindLink())
		{
	       case HtmlParser_LinkFailed:   // insert of the link failed
	          return HtmlParser_LinkFailed;
		  break;
		  
	       case HtmlParser_NormalLink:   // it has a link   
	       case HtmlParser_DirectLink:   // ditto
	       case HtmlParser_Anchor: // we must store it
                  store_statement = true;   // the attribute contains a link
	          break;
		  
	       case HtmlParser_NoLink: // No Link. Do nothing
	       default:
	          break;
	    }

	    	// Accessibility checks
        	if (CurrentScheduler->Config->Boolean("accessibility_checks"))
			{
            	if (CurrentTag == HtmlStatement::Tag_IMG ||
            		CurrentTag == HtmlStatement::Tag_INPUT)
            	{
               		store_statement = true;
               		// We are inside an IMG tag
               		if (htmlattribute.GetAttributeLabel() == HtmlAttribute::Attr_ALT)
               		{
                  		// ALT specified
                  		tag_acheck |= ACHECKTAGalt;
						CurrentAltText = htmlattribute.GetContent();
   						AltAttrPosition = htmlattribute.GetAttrPosition();
               		}
            		if (CurrentTag == HtmlStatement::Tag_INPUT &&
				htmlattribute.GetAttributeLabel() == HtmlAttribute::Attr_TYPE
						&& !mystrncasecmp(htmlattribute.GetContent().c_str(), "image", 5) )
					{
                  		// INPUT image specified
                  		tag_acheck |= ACHECKTAGinputimg;
					}
            	}
			}
         }

         // The attribute is stored if store attribute is set to true
         
         if (store_statement)
         {
            // The tag also has to be inserted
            if (!tag_stored)
            {
               // Database Insertion of the HtmlStatement object

      	       // Check if it fails
               if (!CurrentScheduler->GetDB()->Insert(htmlstatement))
	          return HtmlParser_StatementFailed; // Failed

               tag_stored = true;

            }

            // Database Insertion of the HtmlAttribute object
            if (!CurrentScheduler->GetDB()->Insert(htmlattribute))
	       return HtmlParser_AttributeFailed; // Failed

         }
                  
         while (*Statement && isspace(*Statement))
         {
            if (*Statement == (HTCHECK_CHAR) 10)
               newRow();
            ++Statement;   // goes on ...
         }
      }   
   }
   else
   {
       // Tag with No attributes
      if (store_statement)
      {
         // The tag also has to be inserted
         if (!CurrentScheduler->GetDB()->Insert(htmlstatement))
            return HtmlParser_StatementFailed; // Failed
      }

   }

   if (malformed_tag)
      return HtmlParser_MalformedTag;
   else if (store_statement && // Accessibility checks
      CurrentScheduler->Config->Boolean("accessibility_checks"))
   {
      // Accessibility checks
      if (CurrentTag == HtmlStatement::Tag_IMG)
      {
         // Missing ALT (Open Accessibility Check: Code 1)
         if (!(tag_acheck & ACHECKTAGalt))
         {
            // The accessibility check needs to be inserted
            if (!InsertAccessibilityCheck(
               CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 1))
                  return HtmlParser_AccessibilityCheckFailed; // Failed
         }
		 else
		 {
			unsigned altcheck = CheckAlt();
			// OAC #2
			if (altcheck & ALTsameasfile)
			{
            	// The accessibility check needs to be inserted
            	if (!InsertAccessibilityCheck(
               		CurrentScheduler->CurrentUrl->GetID(), TagPosition,
						AltAttrPosition, 2)) // Failed
                  			return HtmlParser_AccessibilityCheckFailed;
			}

			if (altcheck & ALTlong) // OAC #3
			{
            	// The accessibility check needs to be inserted
            	if (!InsertAccessibilityCheck(
               		CurrentScheduler->CurrentUrl->GetID(), TagPosition,
						AltAttrPosition, 3))
                  		return HtmlParser_AccessibilityCheckFailed; // Failed
			}

			// Empty ALT if image is used as an anchor - OAC #7
			if (altcheck & ALTempty && location & TAGlink)
			{
            	// The accessibility check needs to be inserted
            	if (!InsertAccessibilityCheck(
               		CurrentScheduler->CurrentUrl->GetID(), TagPosition,
						AltAttrPosition, 7))
                  		return HtmlParser_AccessibilityCheckFailed; // Failed
			}
		 }
      }
      else if (location & TAGhx)
      {
         if (HxStep > 1)
         {
            // Wrong header nesting (h2 after h1, h3 after h2, etc.)
            // OAC #37, 38, 39, 40, 41 
            if (!InsertAccessibilityCheck(
               CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, (35+CurrentHx)))
                  return HtmlParser_AccessibilityCheckFailed; // Failed
         }
      }
      else if (CurrentTag == HtmlStatement::Tag_B)
      {
         // B element should not be used (OAC #116)
         if (!InsertAccessibilityCheck(
            CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 116))
               return HtmlParser_AccessibilityCheckFailed; // Failed
      }
      else if (CurrentTag == HtmlStatement::Tag_I)
      {
         // I element should not be used (OAC #117)
         if (!InsertAccessibilityCheck(
            CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 117))
               return HtmlParser_AccessibilityCheckFailed; // Failed
      }
      else if (CurrentTag == HtmlStatement::Tag_BLINK)
      {
         // BLINK element should not be used (OAC #27)
         if (!InsertAccessibilityCheck(
            CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 27))
               return HtmlParser_AccessibilityCheckFailed; // Failed
      }
      else if (CurrentTag == HtmlStatement::Tag_MARQUEE)
      {
         // MARQUEE element should not be used (OAC #69)
         if (!InsertAccessibilityCheck(
            CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 69))
               return HtmlParser_AccessibilityCheckFailed; // Failed
      }
      else if (location & TAGrefresh)
      {
         unsigned acheckcode(72); // default -- refresh
         // Different destination URL ... it is a redirect
         if (link.GetIDUrlSrc() != link.GetIDUrlDest())
                 acheckcode = 71;

         // Auto-redirect should not be used (OAC #72)
         if (!InsertAccessibilityCheck(
            CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, acheckcode))
               return HtmlParser_AccessibilityCheckFailed; // Failed
      }
	  else if (CurrentTag == HtmlStatement::Tag_INPUT)
	  {
         // Missing ALT for input images (OAC #58)
         if (tag_acheck & ACHECKTAGinputimg)
		 {
			if (!(tag_acheck & ACHECKTAGalt))
		 	{
         		if (!InsertAccessibilityCheck(
            		CurrentScheduler->CurrentUrl->GetID(), TagPosition, 0, 58))
               		return HtmlParser_AccessibilityCheckFailed; // Failed
		 	}
		 	else
		 	{
				unsigned altcheck = CheckAlt();
				// OAC #61
				if (altcheck & ALTsameasfile)
				{
            		// The accessibility check needs to be inserted
            		if (!InsertAccessibilityCheck(
               			CurrentScheduler->CurrentUrl->GetID(), TagPosition,
							AltAttrPosition, 61)) // Failed
                  				return HtmlParser_AccessibilityCheckFailed;
				}

				if (altcheck & ALTlong) // OAC #60
				{
            		// The accessibility check needs to be inserted
            		if (!InsertAccessibilityCheck(
               			CurrentScheduler->CurrentUrl->GetID(), TagPosition,
							AltAttrPosition, 60))
                  			return HtmlParser_AccessibilityCheckFailed; // Failed
				}
				else if (altcheck & ALTlong) // OAC #59
				{
            		// The accessibility check needs to be inserted
            		if (!InsertAccessibilityCheck(
               			CurrentScheduler->CurrentUrl->GetID(), TagPosition,
							AltAttrPosition, 59))
                  			return HtmlParser_AccessibilityCheckFailed; // Failed
				}
		 	}
	  	}
	  }
   }

   return HtmlParser_OK;

}



// This method realize if a tag needs to be stored and if it contains
// a link inside. If yes it provides its storing.
// A value is returned, giving the calling function the idea
// of what happened inside.

HtmlParser::HtmlParser_Codes HtmlParser::FindLink ()
{

    const HtmlStatement::ElementLabel Tag (htmlstatement.GetElementLabel());
    const HtmlAttribute::AttributeLabel Attribute (htmlattribute.GetAttributeLabel());
    int is_a_link(0); // Values: 0 - No Link ; 1 - Normal Link ; 2 - Direct Link
                            //   -1 : Anchor (no link)
    //std::cout << "TAG name: " << htmlstatement.GetTag() << " - label: " << Tag
	//<< " / Attribute name: " << htmlattribute.GetAttribute() << " - label: " << Attribute << std::endl;
                            
   std::string Content(htmlattribute.GetContent());

   ///////
      //    'A href'
   ///////
      
   if (Tag == HtmlStatement::Tag_A && Attribute == HtmlAttribute::Attr_HREF) // A href
       {
         is_a_link = 1;
         location |= TAGlink;
		 LastLinkTagPosition = TagPosition; // set the tag position with the last link
         LinkDescription.clear(); // first erase the description
   }

   ///////
      //    Any 'id' attribute or "A name" could be suitable for anchors settings
   ///////
   else if (Attribute == HtmlAttribute::Attr_ID || // Any id attribute
      (Tag == HtmlStatement::Tag_A && Attribute == HtmlAttribute::Attr_NAME)) // A name
   {
      // It's a anchor. Let's decode it's SGML entities
      htmlattribute.SetContent(encodeSGML(htmlattribute.GetContent()));
      // And let's store it always ... even if it's not a link
      is_a_link = -1;   // Special case - not to be stored in the link table
   }

   ///////
      //    'META' tag
   ///////

   else if (Tag == HtmlStatement::Tag_META)
   {

      if (Attribute == HtmlAttribute::Attr_CONTENT) // Here it's the info
      {
         Configuration attrs;
         
         attrs.NameValueSeparators("=");
         attrs.Add(htmlstatement.GetStatement().c_str());
         
         if (!attrs["http-equiv"].empty())
         {
            if (! mystrcasecmp(attrs["http-equiv"], "refresh"))
            {

               location |= TAGrefresh;
                  
               std::string tmp (htmlattribute.GetContent());
               const HTCHECK_CHAR* q = mystrcasestr(tmp.c_str(), "url=");

               if (q)
               {
                  // Found a Meta 'refresh' directive
                  if (debug > 4)
                     cout << " META refresh found. " << endl;
               
                  q+=3; // skipping "URL"

      	          // And any junk space between 'URL' and '=' and after
      	          while (*q && ((*q == '=') || isspace(*q)))
                  {
                     if (*q == (HTCHECK_CHAR) 10)
                        newRow();
                     ++q;
                  }
		  
                  HTCHECK_CHAR* qq(const_cast<HTCHECK_CHAR*>(q));
                  while (*qq && (*qq != ';') && (*qq != '"') &&
                     !isspace(*qq)) ++qq;
                  
                  *qq = 0;
                  
                  is_a_link = 1;

                  Content = q;
                  
               }
            }
	    else if (! mystrcasecmp(attrs["http-equiv"], "content-type"))
	    {
               std::string tmp (htmlattribute.GetContent());
               const HTCHECK_CHAR* q = mystrcasestr(tmp.c_str(), "charset=");

               if (q)
	       {
                  // Found a Meta 'content-type' directive
                  
                  if (debug > 4)
                     cout << " META content-type found. " << endl;
               
                  q+=7; // skipping "charset"

      	          // And any junk space between 'charset' and '=' and after
      	          while (*q && ((*q == '=') || isspace(*q)))
                  {
                     if (*q == (HTCHECK_CHAR) 10)
                        newRow();
                     ++q;
                  }
		  
                  HTCHECK_CHAR* qq(const_cast<HTCHECK_CHAR*>(q));
                  while (*qq && !isspace(*qq))
                      ++qq;
                  
                  *qq = 0;
                  
		  Charset = q; // Set the Charset
	       }
	    }
	    else if (! mystrcasecmp(attrs["http-equiv"], "content-language"))
	    {
               CurrentScheduler->CurrentUrl->SetContentLanguage( htmlattribute.GetContent() );
	    }
      }
            else if (! mystrcasecmp(attrs["name"], "description"))
                Description = htmlattribute.GetContent(); // Set the description
            else if (! mystrcasecmp(attrs["name"], "keywords"))
                Keywords = htmlattribute.GetContent(); // Set the keywords
#ifdef HTDIG_NOTIFICATION
            else if (! mystrcasecmp(attrs["name"], "htdig-email"))
                HtDigEmail = htmlattribute.GetContent(); // Set the email
            else if (! mystrcasecmp(attrs["name"], "htdig-email-subject"))
                HtDigEmailSubject =
                    htmlattribute.GetContent(); // Set the subject
            else if (! mystrcasecmp(attrs["name"], "htdig-notification-date"))
                HtDigNotificationDate =
                    htmlattribute.GetContent(); // Set the date of notification
#endif
         }
   }

   ///////
      //    'HTML' tag
   ///////
   else if (Tag == HtmlStatement::Tag_HTML)
   {
      // Set the document language
      if (Attribute == HtmlAttribute::Attr_LANG // lang
        || Attribute == HtmlAttribute::Attr_XML_LANG) // xml:lang
                DocLanguage = htmlattribute.GetContent();
   }

   ///////
      //    'FRAME' tag
   ///////
   else if (Tag == HtmlStatement::Tag_FRAME)
   {
      if (Attribute == HtmlAttribute::Attr_SRC) // FRAME src
         is_a_link = 1;
   }

   ///////
      //    'EMBED' tag
   ///////
   else if (Tag == HtmlStatement::Tag_EMBED)
   {
      if (Attribute == HtmlAttribute::Attr_SRC) // EMBED src
         is_a_link = 2; // Direct Link
   }

   ///////
      //    'OBJECT' tag
   ///////
   else if (Tag == HtmlStatement::Tag_OBJECT)
   {
      if (Attribute == HtmlAttribute::Attr_SRC) // OBJECT src
         is_a_link = 2; // Direct Link
      else if (Attribute == HtmlAttribute::Attr_DATA) // OBJECT data
         is_a_link = 2; // Direct Link
   }

   ///////
      //    'IMG' tag
   ///////
   else if (Tag == HtmlStatement::Tag_IMG)
   {
      CurrentTag = Tag; // within an image
      if (Attribute == HtmlAttribute::Attr_SRC) // IMG src
	  {
         CurrentResourceRef = Content;
         is_a_link = 2; // Direct Link
	  }
	  else if (Attribute == HtmlAttribute::Attr_LOWSRC) // IMG lowsrc
         is_a_link = 2; // Direct Link
   }

   ///////
      //    'AREA' tag
   ///////
   else if (Tag == HtmlStatement::Tag_AREA)
   {
      if (Attribute == HtmlAttribute::Attr_HREF) // AREA href
         is_a_link = 1;
   }

   ///////
      //    'LINK' tag
   ///////
   else if (Tag == HtmlStatement::Tag_LINK)
   {
      if (Attribute == HtmlAttribute::Attr_HREF) // LINK href
         is_a_link = 1;
   }
   ///////
      //    'INPUT' tag
   ///////
   else if (Tag == HtmlStatement::Tag_INPUT)
   {
		if (! htmlstatement.isClosingTag())
		{
			CurrentTag = Tag;
			if (Attribute == HtmlAttribute::Attr_SRC) // IMG src
			{
				CurrentResourceRef = Content;
				is_a_link = 2; // Direct Link
			}
		}
   }
   ///////
      //    'BASE' tag (Ugly command!)  ;-) 
   ///////
   else if (Tag == HtmlStatement::Tag_BASE)
   {
      if (Attribute == HtmlAttribute::Attr_HREF) // BASE href
      {
         // Let's define a new BASE Url, used for resolving
         // relative URIs. I don't know who can use this, but HTML 4.0
         // enables it.

         if (BaseUrl != CurrentScheduler->CurrentUrl)
            delete BaseUrl; // Base Url different from CurrentUrl. So delete it.

         BaseUrl = new _Url (encodeSGML(Content), *(CurrentScheduler->CurrentUrl));

         if (BaseUrl)
         {
            if (debug > 0)      
               cout << " New Base Url for relative URIs: "
                  << BaseUrl->get() << endl;
         }
         else BaseUrl = CurrentScheduler->CurrentUrl;

      }
   }

   ///////
      //    Let's store any other 'href' attribute
   ///////
   else if (Attribute == HtmlAttribute::Attr_HREF)
      is_a_link = 1;
   
   ///////
      //    Let's store any other 'src' attribute
   ///////
   else if (Attribute == HtmlAttribute::Attr_SRC)
      is_a_link = 1;

   ///////
      //    Let's store any 'background' attribute (BODY, TABLE, etc ...)
   ///////
   else if (Attribute == HtmlAttribute::Attr_BACKGROUND)
         is_a_link = 2; // Direct Link


   // Let's store the links

   if (is_a_link > 0)
   {

	std::string DecodedContent(encodeSGML(Content));

      bool bad_encoded = false;

      if (mystrncasecmp("javascript:", DecodedContent.c_str(), 11))
      {
#ifdef HTCHECK_DEBUG
	 std::cout << "SGML decoding: " << DecodedContent << std::endl;
#endif
         std::string UrlDecodedContent(DecodedContent);

#ifdef HTCHECK_DEBUG
	 std::cout << "SGML decoded: " << UrlDecodedContent << std::endl;
#endif
         static const std::string reserved_chars(((*CurrentScheduler->Config)["url_reserved_chars"]).get());
         encodeURL(UrlDecodedContent, reserved_chars);  // Encoded URL (URL)
   
         // Let's check whether the URL is not well encoded
         if (DecodedContent.compare(UrlDecodedContent))
         {
            if (debug > 0)
            {
               cout << " ! URL not perfectly encoded: " << Content << " rather than "
               << UrlDecodedContent << endl;
            }

            bad_encoded = true;  // Bad encoding of the URL
         }
      }

      _Url *DestUrl = new _Url (DecodedContent,
            *BaseUrl);

      if (DestUrl)
      {

         unsigned int IDUrlDest; // Valid referenced Url
         
         CurrentScheduler->AddUrl(DestUrl->get().get(), IDUrlDest);

         if (debug > 3)      
            cout << htmlattribute.GetContent() << " -> "
               << DestUrl->get() << endl;

         link.Reset();     // reset the previous link object
            
         // Set the source Url ID
         link.SetIDUrlSrc(CurrentScheduler->CurrentUrl->GetID());            

         // Set the dest Url ID
         link.SetIDUrlDest(IDUrlDest);
            
         // Set the tag position
         link.SetTagPosition(htmlstatement.GetTagPosition());

         // Set the attribute position
         link.SetAttrPosition(htmlattribute.GetAttrPosition());

         if (bad_encoded)
            link.SetLinkResult("BadEncoded");

         // Set the anchor field, if a '#' is present in the
         // HTML attribute's content
         const std::string::size_type position(htmlattribute.GetContent().rfind('#'));

	 if (position != std::string::npos) {
	   // Decode the content
	   std::string decoded;
	   const std::string from (htmlattribute.GetContent().c_str() + (position + 1));

            // There's an anchor
            link.SetAnchor(encodeSGML(from));
         }
            
         // Set the Link Type
         switch(is_a_link)
         {
            case 1:
               link.SetLinkType("Normal");
               break;
            case 2:
               link.SetLinkType("Direct");
               break;
         }

         // Let's check whether it regards a 'file://' call 
         // which is certainly broken, or an e-mail address
         
         if (CurrentScheduler->CurrentLinkSchedule.GetStatus()
            == SchedulerEntry::Url_FileProtocol)
         {
            // Hey, there's a 'file://' call, it's an error!
            
            link.SetLinkResult("Broken");
            if (debug > 2)      
               cout << " 'file:/' link, error!" << endl;
         }
         else if (CurrentScheduler->CurrentLinkSchedule.GetStatus()
            == SchedulerEntry::Url_Malformed)
         {
            // Hey, there's a malformed URL, it's an error!
            
            link.SetLinkResult("Broken");
            if (debug > 2)      
               cout << " link to a malformed URL, error!" << endl;
         }
         else if (CurrentScheduler->CurrentLinkSchedule.GetStatus()
            == SchedulerEntry::Url_EMail)
         {
            // There's an e-mail address!
            link.SetLinkResult("EMail");
            if (debug > 2)      
               cout << " e-mail address!" << endl;
         }
         else if (CurrentScheduler->CurrentLinkSchedule.GetStatus()
            == SchedulerEntry::Url_Javascript)
         {
            // There's a Javascript inserted through the pseudo-protocol
	    // that is to say 'javascript:'
            link.SetLinkResult("Javascript");
            if (debug > 2)      
               cout << " link to Javascript URL "
	          << "(through the 'javascript:' pseudo-protocol)!" << endl;
         }
         
	 // Update the Domain information for the link
	 switch(CurrentScheduler->CurrentLinkSchedule.GetDomain())
	 {
	    case SchedulerEntry::Url_External:
	       link.SetLinkDomain(Link::Link_External);
	       break;
	    case SchedulerEntry::Url_Internal:
	       if (CurrentScheduler->CurrentLinkSchedule.GetIDServer()
	          == CurrentScheduler->CurrentUrl->GetIDServer())
	          link.SetLinkDomain(Link::Link_SameServer);
	       else
   	          link.SetLinkDomain(Link::Link_Internal);
	       break;
	    case SchedulerEntry::Url_Unknown:
   	          link.SetLinkDomain(Link::Link_Unknown);
	 }
	 
         // Write the link object
         if (!CurrentScheduler->GetDB()->Insert(link))
            return HtmlParser_LinkFailed;
            
      }
               
      delete DestUrl;

   }

   //cout << "TAG: " << Tag << " - LOCATION POST: " << location << endl;
   switch (is_a_link)
   {
      case 0:
      	 return HtmlParser_NoLink;
	 break;
      case 1:
      	 return HtmlParser_NormalLink;
	 break;
      case 2:
      	 return HtmlParser_DirectLink;
	 break;
      case -1:
      	 return HtmlParser_Anchor;
	 break;
   }

   // We should not get up to here, anyway this avoid warning messages
   return HtmlParser_NoLink;
   
}


int HtmlParser::CheckTag(const HtmlStatement& tag)
{

   // More controls in order to decide which tags to store
   if (debug > 5)
      cout << "Checking tag: " << tag.GetTag() << endl;

   const HtmlStatement::ElementLabel label(tag.GetElementLabel());
   
   ///////
      //    'HEAD' tag
   ///////
   if (label == HtmlStatement::Tag_HEAD)
   {
        if (! tag.isClosingTag()) {
            location |= TAGhead;
	    CurrentTag = label;
        }
        else {
            location &= ~TAGhead;
	}
   }
   ///////
      //    'SCRIPT' tag
   ///////
   else if (label == HtmlStatement::Tag_SCRIPT)
   {
        if (! tag.isClosingTag()) {
            location |= TAGscript;
        }
        else {
            location &= ~TAGscript;
	}
   }
   ///////
      //    'TITLE' tag
   ///////
   else if (label == HtmlStatement::Tag_TITLE)
   {
        if (location & TAGhead)
	{
	    if (! tag.isClosingTag()) {
                location |= TAGtitle;
		doc_acheck |= ACHECKDOCtitle;
	    }
	    else {
                location &= ~TAGtitle;
	    }
	}
   }
   ///////
      //    'A' tag
   ///////
   else if (label == HtmlStatement::Tag_A)
   {
	if (! tag.isClosingTag()) {
            location &= ~TAGlink;
	}
   }

   ////////////////////////////////////////////////////////////
   ////////////////////////////////////////////////////////////
   // Accessibility Checks
   ////////////////////////////////////////////////////////////
   ////////////////////////////////////////////////////////////
   if (!CurrentScheduler->Config->Boolean("accessibility_checks"))
		return 1;

   ///////
      //    'Hx' tag
   ///////
   if (label >= HtmlStatement::Tag_H1 && label <= HtmlStatement::Tag_H6)
   {
	if (! tag.isClosingTag()) {
	    location |= TAGhx;
	    CurrentHx = (label - HtmlStatement::Tag_H1 + 1);

	    if ((HxStep = (CurrentHx - PreviousHx)) > 1)
		store_statement = true;

	    PreviousHx = CurrentHx;
	}
	else {
	    location &= ~TAGhx;
	}
    }
   ///////
      //    'B' tag
   ///////
   else if (label == HtmlStatement::Tag_B)
   {
	if (! tag.isClosingTag()) {
	    CurrentTag = label;
	}
   }
   ///////
      //    'I' tag
   ///////
   else if (label == HtmlStatement::Tag_I)
   {
	if (! tag.isClosingTag()) {
	    CurrentTag = label;
	}
   }
   ///////
      //    'BLINK' tag
   ///////
   else if (label == HtmlStatement::Tag_BLINK)
   {
	if (! tag.isClosingTag()) {
	    CurrentTag = label;
	}
   }
   ///////
      //    'MARQUEE' tag
   ///////
   else if (label == HtmlStatement::Tag_MARQUEE)
   {
	if (! tag.isClosingTag()) {
	    CurrentTag = label;
	}
   }
   
   return 1;

}

// Insert an accessibility check record into the database
bool HtmlParser::InsertAccessibilityCheck(unsigned int idurl, unsigned int tagposition,
         unsigned int attrposition, unsigned int code)
{
	// Accessibility Check object
	AccessibilityCheck accessibilitycheck;

	// Set the parameters
    accessibilitycheck.SetIDCheck(AccessibilityCheck::GetLastID() +1);
	accessibilitycheck.SetIDUrl(idurl);
	accessibilitycheck.SetTagPosition(tagposition);
	accessibilitycheck.SetAttrPosition(attrposition);
	accessibilitycheck.SetCode(code);
	// Updates the check ID (counter)
	AccessibilityCheck::SetLastID(accessibilitycheck.GetIDCheck());

	// The accessibility check needs to be inserted
	return CurrentScheduler->GetDB()->Insert(accessibilitycheck);
}

// Returns the length of an SGML string stripping consecutive spaces
unsigned HtmlParser::CountSGMLStringLength(const char* str)
{
	unsigned counter(0);
	for (const char* p = str; p && *p; ++p)
	{
		// Ignore consecutive and initial spaces
		if (isspace(*p))
		{
			if (!counter || isspace(* (p-1)))
				continue;
		}
		++counter;
	}

	return counter;
}

// Returns an integer with results of a check regarding an ALT text
unsigned HtmlParser::CheckAlt()
{
	unsigned rv(0);

	////////////////////////////////////
	// ALT Text
	////////////////////////////////////
	// Remove trailing and ending spaces
	const std::string::size_type alt_last_valid(CurrentAltText.find_last_not_of("\n \r\t"));
	const std::string::size_type alt_first_valid(CurrentAltText.find_first_not_of("\n \r\t"));
	if (alt_last_valid != std::string::npos && alt_first_valid != std::string::npos) {

	    std::string clean_alt(CurrentAltText.substr(alt_first_valid, alt_last_valid - alt_first_valid + 1));

	    ////////////////////////////////////
	    // Link
	    ////////////////////////////////////
	    // Remove trailing and ending spaces
	    const std::string::size_type last_valid(CurrentResourceRef.find_last_not_of("\n \r\t"));
	    const std::string::size_type first_valid(CurrentResourceRef.find_first_not_of("\n \r\t"));

	    if (last_valid != std::string::npos && first_valid != std::string::npos) {
		std::string clean_resource_ref(CurrentResourceRef.substr(first_valid, last_valid - first_valid + 1));
		const std::string::size_type last_slash(clean_resource_ref.find_last_of('/'));

		// Get the file name
		if (last_slash != std::string::npos) {
		    const std::string file_name(clean_resource_ref.substr(last_slash+1));

		    // Compares the file name and the alt text
		    if (file_name.length() == clean_alt.length()) {
			const std::string::size_type l( clean_alt.length() );
			bool identical(true);

			// Lowercase comparison of the file name and the ALT text
			for (std::string::size_type j(0); identical && j < l; ++j) {
			    if (tolower(file_name[j]) != tolower(clean_alt[j])) {
				identical = false;
			    }
			}

			if (identical) {
			    rv |= ALTsameasfile;
			}
		    }

		}
	    }

	    // Controls the length of the text
	    // Encode the ALT and count its length
	    unsigned counter = CountSGMLStringLength(encodeSGML(clean_alt).c_str());

	    // ALT longer than 150 characters
	    if (counter >= 150)
		rv |= ALTlong;

	}
	else
	{
		rv |= ALTempty;
	}

	return rv;
}

#ifdef HTDIG_NOTIFICATION

// Properly set the htDig notification date
// Disclaimer: the logic behind this function has been taken from
// the ht://Dig code. The actual code has been slightly modified.
// However, without their work it would have taken much more
// time to develop it. Thanks guys. :)
bool HtmlParser::parseDate(const std::string& date)
{
    std::string scandate (date);
    int dd(-1), mm(-1), yy(-1), t(0);

    // Convert punctuation into spaces for sscanf
    for (std::string::iterator s(scandate.begin()); s != scandate.begin(); ++s) {
        if (ispunct(*s))
            *s = ' ';
    }

    //////////////////////////////////////
    // Try with the ISO 8601 standard
    //////////////////////////////////////
    sscanf(scandate.c_str(), "%d%d%d", &yy, &mm, &dd);

    // Test the date
    if (testDate(dd, mm, yy))
    {
        setHtDigNotificationDate(dd, mm, yy);
        return true;
    }

    //////////////////////////////////////
    // Try with the American format
    //////////////////////////////////////
    sscanf(scandate.c_str(), "%d%d%d", &mm, &dd, &yy);
    if (mm > 31 && dd <= 12 && yy <= 31)
    {
        // probably got yyyy-mm-dd instead of mm/dd/yy
        t = mm; mm = dd; dd = yy; yy = t;
    }

    // Test the date
    if (testDate(dd, mm, yy))
    {
        setHtDigNotificationDate(dd, mm, yy);
        return true;
    }

    //////////////////////////////////////
    // No luck - let's try and guess it
    //////////////////////////////////////

    // OK, we took our best guess at the order the y, m & d should be.
    // Now let's see if we guessed wrong, and fix it.  This won't work
    // for ambiguous dates (e.g. 01/02/03), which must be given in the
    // expected format.
    
    // Code from ht://Dig 3.1
    if (dd > 31 && yy <= 31)
    {
        t = yy; yy = dd; dd = t;
    }
    if (mm > 31 && yy <= 31)
    {
        t = yy; yy = mm; mm = t;
    }
    if (mm > 12 && dd <= 12)
    {
        t = dd; dd = mm; mm = t;
    }

    // Test the date
    if (testDate(dd, mm, yy))
    {
        setHtDigNotificationDate(dd, mm, yy);
        return true;
    }

    return false;
}


// Test whether a date is correct
bool HtmlParser::testDate(const int dd, const int mm, const int yy) const
{
    if (yy < 0 || mm < 1 || mm > 12 || dd < 1 || dd > 31)
        return false;
    return true;
}

// Test whether a date is correct
void HtmlParser::setHtDigNotificationDate(const int dd, const int mm, const int yy)
{
    std::ostringstream s;
    s << yy << '-' << mm << '-' << dd;
    HtDigNotificationDate = s.str();
}

#endif
