/*

  HL7 Parser Prototype

*/

#include <cstdlib>
#include <iostream>
#include "globaldefines.h"
#include "hl7parse.h"
#include "stringutils.h"
#include "utils.h"

#pragma warn -8060

//=============================================================================
//
// HL7Value
//

//
// HL7Value::findInSubComponents0
//
// Searches for the named component amongst subcomponents of this value.
// This protected version returns NULL on failure. It is for use only in
// HL7Segment::findInValueTree.
//
HL7Value *HL7Value::findInSubComponents0(const char *token)
{
   HL7Value *ret = NULL;

   if(numExpectedSubComponents > 0)
   {
      unsigned int key = D_HashTableKey(token) % NUMSUBCOMPCHAINS;
      HL7Value *chain = subCompChains[key];

      while(chain && stricmp(chain->fieldname, token))
         chain = chain->next;

      ret = chain;
   }

   return ret;
}

//
// HL7Value::findInSubComponents
//
// Searches for the named component amongst subcomponents of this value.
//
HL7Value *HL7Value::findInSubComponents(const char *token)
{
   static HL7Value dummyValue;
   HL7Value *ret = &dummyValue;

   if(numExpectedSubComponents > 0)
   {
      unsigned int key = D_HashTableKey(token) % NUMSUBCOMPCHAINS;
      HL7Value *chain = subCompChains[key];

      while(chain && stricmp(chain->fieldname, token))
         chain = chain->next;

      ret = chain;
   }


   if(!ret || !ret->fieldname)
      ret = &dummyValue;

   return ret;
}

//
// HL7Value Destructor
//
// jhaley 20121019: Because properly freeing the single largest set of data
// structures in your program is usually a good idea...
//
HL7Value::~HL7Value()
{
   // Am I on a repeatValues chain? if so, free next on the chain first.
   // It will recursively free its next repeat and so on.
   if(repeatValues)
   {
      delete repeatValues;
      repeatValues = NULL;
   }

   // Do I have subcomponents? Free them. They will free their own subcomponents
   // recursively.
   if(subComponents)
   {
      delete [] subComponents;
      subComponents = NULL;
   }
}

//=============================================================================
//
// HL7Segment - Abstract Segment Base Class
//

// RTTI/Factory Construction Proxy
HL7Segment::Type HL7Segment::StaticType("HL7", NULL); // intentionally non-matching.

// Hash table chain head pointers
HL7Segment::Type *HL7Segment::Type::segmentTypes[NUMTYPECHAINS];

//
// HL7Segment::Type::FindType
//
// Find the proxy class instance representing a particular type of HL7 message
// by the HL7 message type (ie., "MSH")
//
HL7Segment::Type *HL7Segment::Type::FindType(const char *pName)
{
   unsigned int hashcode = D_HashTableKeyCase(pName) % NUMTYPECHAINS;
   Type *chain = segmentTypes[hashcode];

   while(chain && strcmp(chain->name, pName))
      chain = chain->next;

   return chain;
}

//
// HL7Segment::Type::addType
//
// Called by HL7Segment's constructor; adds the instance to this class's static
// hash table for runtime lookups.
// 
void HL7Segment::Type::addType()
{
   unsigned int hashcode;

   // Add it to the hash table; order is unimportant.
   hashcode = D_HashTableKeyCase(name) % NUMTYPECHAINS;
   this->next = segmentTypes[hashcode];
   segmentTypes[hashcode] = this;
}

//
// HL7Segment::SegmentForSegmentName
//
// Static HL7 message factory.
// Instantiate the proper type of segment object given an HL7 message name.
//
HL7Segment *HL7Segment::SegmentForSegmentName(const char *name)
{
   HL7Segment *retval = NULL;
   Type *thatType = Type::FindType(name);

   if(thatType)
   {
      retval = thatType->newSegment();
      retval->constructValueTree();    // construct value tree here.
   }

   return retval;
}

//
// HL7Segment::constructValueTreeRecursive
//
// Walks down the HL7FieldInfo structure, recursing whenever an entry in the
// array indicates it has multiple expected sub-components.
//
void HL7Segment::constructValueTreeRecursive(const HL7FieldInfo *info,
                                             int numComponents,
                                             HL7Value **root)
{
   HL7Value *rover;
   *root = new HL7Value [numComponents];

   rover = *root;

   for(int i = 0; i < numComponents; i++)
   {
      const HL7FieldInfo *current = &info[i];

      rover->level = current->level;
      rover->fieldname = current->fieldname;
      rover->numExpectedSubComponents = current->numExpectedSubComponents;
      rover->value = "";

      if(current->numExpectedSubComponents > 0)
      {
         constructValueTreeRecursive(current->subComponents,
                                     current->numExpectedSubComponents,
                                     &rover->subComponents);

         // construct subcomponent hash table
         memset(rover->subCompChains, 0, NUMSUBCOMPCHAINS*sizeof(HL7Value *));
         for(int sc = 0; sc < rover->numExpectedSubComponents-1; sc++)
         {
            unsigned int key =
               D_HashTableKey(rover->subComponents[sc].fieldname) % NUMSUBCOMPCHAINS;
            rover->subComponents[sc].next = rover->subCompChains[key];
            rover->subCompChains[key] = &(rover->subComponents[sc]);
         }
      }
      else
         rover->subComponents = NULL;

      // step to next at this level   
      ++rover;
   }
}

//
// HL7Segment::constructValueTree
//
// Kicks off the recursive value tree building process executed by the above
// routine with information about the segment's field info.
//
void HL7Segment::constructValueTree()
{
   constructValueTreeRecursive(getFieldInfo(), getNumFields(), &valueTree);

   // construct top level hash table
   memset(valueChains, 0, NUMSUBCOMPCHAINS * sizeof(HL7Value *));
   for(int i = 0; i < getNumFields() - 1; i++)
   {
      unsigned int key = D_HashTableKey(valueTree[i].fieldname) % NUMSUBCOMPCHAINS;
      valueTree[i].next = valueChains[key];
      valueChains[key] = &valueTree[i];
   }
}

//
// HL7Segment::destroyValueTree
//
// Recursively deletes all the HL7Value instances.
//
void HL7Segment::destroyValueTree(HL7Value *root)
{
   // Delete the HL7 value tree root components.
   // Freeing of subcomponents and repeat values is the responsibility of
   // HL7Value's destructor (as of 20121019; this plugs a massive memory leak).
   delete [] root;
}

//
// HL7Segment Destructor
//
// Free the valueTree.
//
HL7Segment::~HL7Segment()
{
   if(valueTree)
   {
      destroyValueTree(valueTree);
      valueTree = NULL;
   }
}

//
// HL7Segment::findInValueTree
//
// Send in a string consisting of hierarchically related field names separated
// by "." characters, ex: "phone_number.compound" - starting from the root
// value array of the segment, the names will be matched recursively and then
// the resulting value (if such exists) will be returned.
//
HL7Value *HL7Segment::findInValueTree(const char *fieldPath)
{
   static HL7Value dummyValue;
   HL7Value *ret = &dummyValue;
   HL7Value *cur = valueTree;
   char *tempbuf = strdup(fieldPath);
   char *rover   = tempbuf;

   rover = strtok(rover, "."); // get first token

   // find the first specified field name in the segment array
   unsigned int key = D_HashTableKey(rover) % NUMSUBCOMPCHAINS;
   HL7Value *chain = valueChains[key];
   while(chain && stricmp(chain->fieldname, rover))
      chain = chain->next;
   cur = chain;

   if(cur && cur->fieldname)
   {
      // continue to tokenize the string, descending at each match
      while((rover = strtok(NULL, ".")))
      {
         cur = cur->findInSubComponents0(rover); // need NULL on failure here...
         if(!cur)
            break; // not found.
      }

      ret = cur;
   }

   if(!ret || !ret->fieldname)
      ret = &dummyValue;

   free(tempbuf);

   return ret;
}

//
// HL7Segment::valueForIndex
//
// Given one of the enumeration values defined for expected fields in HL7Segment
// descendant classes, this will return the value at that index.
//   ex: value = mshSegment->valueForIndex(MSHSegment::MSH_SPECIALCHARS);
//
HL7Value *HL7Segment::valueForIndex(int i)
{
   static HL7Value dummyValue;
   HL7Value *ret = &dummyValue;

#ifdef RANGECHECK
   if(i >= 0 && i < getNumFields())
#endif
      ret = &valueTree[i];

   return ret;
}

//=============================================================================
//
// Tokenizer
//

// Token Names - For debug output.

const char *const HL7Tokenizer::TokenNames[TOKEN_NUMTOKENS] =
{
   "TOKEN_UNKNOWN",    
   "TOKEN_FIELDVALUE", 
   "TOKEN_PIPE",       
   "TOKEN_HAT",        
   "TOKEN_TILDE",      
   "TOKEN_BACKSLASH",  
   "TOKEN_AMPERSAND",  
   "TOKEN_EOL",        
   "TOKEN_EOS"         
};

// State Handlers

//
// HL7Tokenizer::doStateScan
//
// Scan for the start of a new token.
//
void HL7Tokenizer::doStateScan()
{
   switch(input[textPos])
   {
   case '|':
      tokenType = TOKEN_PIPE;
      tokenText = "|";
      state = TSTATE_DONE;
      break;
   case '^':
      tokenType = TOKEN_HAT;
      tokenText = "^";
      state = TSTATE_DONE;
      break;
   case '~':
      tokenType = TOKEN_TILDE;
      tokenText = "~";
      state = TSTATE_DONE;
      break;
   case '&':
      tokenType = TOKEN_AMPERSAND;
      tokenText = "&";
      state = TSTATE_DONE;
      break;
   case '\\': // Escape character
      if(!disableEscapes)
      {
         tokenType = TOKEN_FIELDVALUE;
         escapedState = state;
         state = TSTATE_ESCAPE;
      }
      else
      {
         tokenType = TOKEN_BACKSLASH;
         tokenText = "\\";
         state = TSTATE_DONE;
      }
      break;
   case 0x0D: // End of line
      tokenType = TOKEN_EOL;
      tokenText = static_cast<char>(0x0D);
      state = TSTATE_DONE;
      break;
   case '\0': // End of string
      doNotAdvance = true;
      tokenType = TOKEN_EOS;
      tokenText = "";
      state = TSTATE_DONE;
      break;
   default: // Anything else starts a field value.
      tokenType = TOKEN_FIELDVALUE;
      tokenText = input[textPos];
      state = TSTATE_INFIELD;
      break;
   }
}

//
// HL7Tokenizer::doStateInField
//
// The tokenizer is inside a field value. Read until a special delimiter,
// end-of-line, or end-of-string. Escaped character sequences may be
// encountered.
//
void HL7Tokenizer::doStateInField()
{
   switch(input[textPos])
   {
   case '|':
   case '^':
   case '~':
   case '&':
   case 0x0D: // End of line
   case '\0': // End of string
      doNotAdvance = true; // Special char, don't advance textPos
      state = TSTATE_DONE;
      break;   
   case '\\': // Escape character
      if(!disableEscapes)
      {
         escapedState = state;
         state = TSTATE_ESCAPE;
      }
      else
         tokenText += input[textPos]; // interpret literally if escapes disabled
      break;
   default: // Anything else is part of the current token
      tokenText += input[textPos];
      break;
   }
}

//
// HL7Tokenizer::doStateEscape
//
// Reading inside an escape. The sequence should be terminated with another
// backslash character.
//
void HL7Tokenizer::doStateEscape()
{
   switch(input[textPos])
   {
   case '\\': // End of escape sequence
      // Where we go next depends on the state from whence we came
      switch(escapedState)
      {
      case TSTATE_SCAN:
         state = TSTATE_INFIELD; // we are now in a field
         break;
      case TSTATE_INFIELD:
         state = TSTATE_INFIELD; // remain in same state we came from.
         break;
      default: // something is wack... go back to scanning.
         state = TSTATE_SCAN;
         break;
      }
      break;
   case 'E':
      tokenText += '\\';
      break;
   case 'F':
      tokenText += '|';
      break;
   case 'H':
      // Highlight: not handled.
      break;
   case 'N':
      // Normal: not handled.
      break;
   case 'R':
      tokenText += '~';
      break;
   case 'S':
      tokenText += '^';
      break;
   case 'T':
      tokenText += '&';
      break;
   case 'C': // Hex char escapes
   case 'M':
   case 'X':
   case 'Z':
      hexNum = "";
      state = TSTATE_ESCHEX;
      break;
   }
}

//
// HL7Tokenizer::doStateEscHex
//
// Reading an escape that consists of a hexadecimal character value.
//
void HL7Tokenizer::doStateEscHex()
{
   if((input[textPos] >= '0' && input[textPos] <= '9') ||
      (input[textPos] >= 'A' && input[textPos] <= 'F') ||
      (input[textPos] >= 'a' && input[textPos] <= 'f'))
   {
      hexNum += input[textPos];
   }
   else if(input[textPos] == '\\')
   {
      tokenText += static_cast<char>(strtol(hexNum.c_str(), NULL, 16));

      // Where we go next depends on the state from whence we came
      switch(escapedState)
      {
      case TSTATE_SCAN:
         state = TSTATE_INFIELD; // we are now in a field
         break;
      case TSTATE_INFIELD:
         state = TSTATE_INFIELD; // remain in same state we came from.
         break;
      default: // something is wack... go back to scanning.
         state = TSTATE_SCAN;
         break;
      }
   }
   else // else: the pooch is screwed... what to do?
   {
      state = TSTATE_SCAN; // try going back to scanning...
      DEBUGOUT(dbg_parser, 
         "Parser: WARNING: breaking out of TSTATE_ESCHEX due to a malformatted escape!");
   }
}

//
// Tokenizer FSA state table
//
// The state value in HL7Tokenizer indexes into this array of method pointers.
// The loop in HL7Tokenizer::nextToken below will dispatch to the proper handler
// using the value of the state variable.
//
void (HL7Tokenizer::* HL7Tokenizer::States[])() =
{
   &HL7Tokenizer::doStateScan,    // TSTATE_SCAN
   &HL7Tokenizer::doStateInField, // TSTATE_INFIELD
   &HL7Tokenizer::doStateEscape,  // TSTATE_ESCAPE
   &HL7Tokenizer::doStateEscHex,  // TSTATE_ESCHEX
};

//
// HL7Tokenizer::nextToken
//
// Call to get the next token from an HL7 message.
//
// Pre-conditions:
// * Set input first.
//
// Post-conditions:
// * tokenType is the type of token retrieved.
// * tokenText contains the contents of that token.
//
void HL7Tokenizer::nextToken()
{
   if(!input)
   {
      tokenType = TOKEN_EOS;
      tokenText = "";
      return; // Woops! I can't parse a NULL pointer, dude.
   }

   // Setup initial parameters of the finite state automaton
   state          = TSTATE_SCAN;
   tokenText      = "";
   tokenType      = TOKEN_UNKNOWN;
   hexNum         = "";
   doNotAdvance   = false;

   while(1)
   {
      // Dispatch state handler
      (this->*States[state])();

      // Advance to next char unless told not to do so.
      if(!doNotAdvance)
         ++textPos;

      // Clear do-not-advance flag, unless at EOS.
      if(input[textPos] != '\0')
         doNotAdvance = false;

      // Done?
      if(state == TSTATE_DONE)
		   break;
   }
}

//=============================================================================
//
// Parser Error Codes
//

enum
{
   HL7_ERR_NOERROR,        // 0 is not an error.
   HL7_ERR_UNKNOWNSEGMENT, // An unknown segment was encountered.
   HL7_ERR_UNEXPTOKEN,     // An unexpected token was in the input stream.

   HL7_ERR_NUMERRORS       // Must be last.
};

static const char *const errorMessages[HL7_ERR_NUMERRORS] =
{
   "No error occurred",
   "An unknown HL7 segment type was encountered",
   "An unexpected token was encountered",
};

//==============================================================================
//
// HL7Parser
//

//
// HL7Parser::setCurField
//
// When setting a field, the tree walk should automatically recurse to the
// deepest level possible, without moving forward.
//
void HL7Parser::setCurField(HL7Value *newField)
{
   int level;
   curField = newField;

   level = curField->level;

   DEBUGOUT(dbg_parser, "Parser: setCurField: setting to " << curField->fieldname);

   // always go down the tree as far as possible when stepping
   while(curField->numExpectedSubComponents > 0)
   {
      pstack.push(curField); // remember where to go when returning here
      curField = curField->subComponents;
   }

   // If we are not currently repeating a top-level value, then record this
   // information so that the parser can jump back to this state later.
   if(!repeating && level == 1)
   {
      backupStack = pstack;
      backupValue = curField;
   }

   DEBUGOUT(dbg_parser, "Parser: setCurField: recursed to " << curField->fieldname);
}

//
// HL7Parser::popStack
//
// Just move up the stack one level. Do not step to the next field.
//
void HL7Parser::popStack()
{
   DEBUGOUT(dbg_parser, "popStack");

   if(!pstack.empty())
   {
      HL7Value *nextField = pstack.top();
      pstack.pop();

      setCurField(nextField);
   }
}

//
// HL7Parser::popAndStep
//
// Move up the stack one level, and then step to the next field.
//
void HL7Parser::popAndStep()
{
   DEBUGOUT(dbg_parser, "popAndStep");

   if(!pstack.empty())
   {
      HL7Value *nextField = pstack.top();
      pstack.pop();

      setCurField(nextField + 1);
   }
}

//
// HL7Parser::popTwoAndStep
//
// Pop up two levels, and then step to the next field.
//
void HL7Parser::popTwoAndStep()
{
   DEBUGOUT(dbg_parser, "popTwoAndStep");
   
   HL7Value *nextField;

   pstack.pop();
   nextField = pstack.top();
   DEBUGOUT(dbg_parser, "popTwoAndStep: nextField is " << nextField->fieldname);
   pstack.pop();

   setCurField(nextField + 1);
}

//
// HL7Parser::determineExpectedDelimiter
//
// Figures out what kind of delimiter we would expect to occur in the token
// stream next based on the current position inside the HL7Value tree.
//
char HL7Parser::determineExpectedDelimiter(bool &needToPop)
{
   char expectedDelim = '?'; // if you see this, something is very wrong.
   HL7Value *next = curField + 1;

   if(curField->level == 1 && next->level == 1)
   {
      expectedDelim = '|';
   }
   else if(curField->level == 2 && next->fieldname == NULL)
   {
      expectedDelim = '|';
      needToPop = true; // need to pop now.
   }
   else if((curField->level == 2 && next->level == 2))
   {
      expectedDelim = '^';
   }
   else if(curField->level == 3 && next->fieldname == NULL)
   {
      expectedDelim = '^';
      needToPop = true; // need to pop now.
   }
   else if(curField->level == 3 && next->level == 3)
   {
      expectedDelim = '&';
   }

   DEBUGOUT(dbg_parser, "determineExpectedDelimiter: expecting " << expectedDelim);

   return expectedDelim;
}

//
// HL7Parser::stepToNext
//
// Step to the next field at the same level.
//
void HL7Parser::stepToNext()
{
   DEBUGOUT(dbg_parser, "stepToNext");
   setCurField(curField + 1);
}

//
// HL7Parser::badDelim
//
// If we hit an extra delimiter in the stream, then there are more subfields in
// the record than the specification being implemented indicates should exist.
// In that case, we have to enter the "spin" state, which cycles continuously
// until the type of delimiter we DID expect to find is encountered, as that
// signals the actual end of the current level of tokens.
//
void HL7Parser::badDelim(char expected, char found)
{
   DEBUGOUT(dbg_parser, "badDelim");
   // flag the delimiter error condition
   delimiterError    = true;
   spinExpectedDelim = expected;
   spinFoundDelim    = found;
}

//
// HL7Parser::nextField
//
// We need to go as deep into the HL7Value tree as immediately possible.
//
// Note that the presence of a stack in this state machine suggests that HL7
// is actually a context-insensitive language and not simply regular. Unusually,
// it seems you can accept or reject it just fine using a FSA - you just can't
// easily extract the proper hierarchical relationship between the fields if
// you don't use a PDA-like stack mechanism.
//
void HL7Parser::nextField(char delimiter)
{
   bool needToPop = false;
   
   DEBUGOUT(dbg_parser, "nextField");
   
   if(!curField) // just starting?
   {
      setCurField(curSegment->valueTree);
   }
   else
   {
      char expectedDelim = determineExpectedDelimiter(needToPop);
      switch(expectedDelim)
      {
      case '|':
         switch(delimiter)
         {
         case '&': badDelim('|', '&'); break; // unexpected subfields
         case '^': badDelim('|', '^'); break; // unexpected subfields
         case '|':
            if(needToPop)
               popAndStep(); // pop and then step
            else
               stepToNext(); // step to next at same level
            break;
         }
         break;
      case '^':
         switch(delimiter)
         {
         case '&': badDelim('^', '&'); break; // unexpected subfields
         case '^':
            if(needToPop)
               popAndStep(); // pop and then step
            else
               stepToNext(); // step to next at same level
            break;
         case '|': popAndStep();       break; // end of subrecords
         }
         break;
      case '&':
         switch(delimiter)
         {
         case '&': stepToNext();    break; // step to next, same level
         case '^': popAndStep();    break; // end subrecords
         case '|': popTwoAndStep(); break; // end subrecordsx2
         }
      }
   }
}

/*
  HL7 Grammar (Pseudo-BNF)
  -----------------------------------------------------------------------------
     <message> := <segmentlist><EOS>
     <segmentlist> := <segment><segmentlist> | <segment>
     <segment> := <header><delimiter><fieldlist><EOL>
     <header> := string value
     <delimiter> := | & ~ ^
     <fieldlist> := <field><delimiter><fieldlist> | <field>
     <field> := string value

  HL7 STATE TRANSITION DIAGRAM
  -----------------------------------------------------------------------------
     EXPECTSEGMENTNAME:
        "MSH"      : MSHSPECIALCHARS
        FIELDVALUE : EXPECTDELIMITER
        EOS        : DONE (Return whole message)
        *          : ERROR
     MSHSPECIALCHARS:
        PIPE       : MSHSPECIALCHARS
        HAT        : MSHSPECIALCHARS
        AMPERSAND  : MSHSPECIALCHARS
        BACKSLASH  : MSHSPECIALCHARS
        TILDE      : EXPECTDELIMITER
        *          : ERROR
     EXPECTDELIMITER:
        PIPE       : EXPECTVALUEORDELIMITER
        HAT        : EXPECTVALUEORDELIMITER
        AMPERSAND  : EXPECTVALUEORDELIMITER
        TILDE      : EXPECTVALUEORDELIMITER
        EOL        : EXPECTSEGMENTNAME (Start new segment)
        EOS        : DONE (Return whole message)
        *          : ERROR
     EXPECTVALUEORDELIMITER:
        FIELDVALUE : EXPECTDELIMITER
        PIPE       : EXPECTVALUEORDELIMITER
        HAT        : EXPECTVALUEORDELIMITER
        AMPERSAND  : EXPECTVALUEORDELIMITER
        TILDE      : EXPECTVALUEORDELIMITER
        EOL        : EXPECTSEGMENTNAME (Start new segment)
        EOS        : DONE (Return whole message)

  NB: Transitions to and from STATE_SPIN depend on the state of the parser
  stack, making this state machine a pushdown automaton when extracting data in
  a hierarchical fashion - see information at the following URL:
  
  http://en.wikipedia.org/wiki/Pushdown_automaton

*/

// Parser state handlers

//
// HL7Parser::setNewSegment
//
// Clear out the per-segment parsing state at the start of a new segment.
//
void HL7Parser::setNewSegment(HL7Segment *newSegment)
{
   // set the segment pointer
   curSegment = newSegment;

   // clear the stack
   while(!pstack.empty())
      pstack.pop();

   // clear curField pointer
   curField = NULL;
}

//
// HL7Parser::doStateExpectSegmentName
//
// Initial state. Returned to anytime a TOKEN_EOL is found.
//
void HL7Parser::doStateExpectSegmentName()
{
   HL7Segment *newSegment;

   tokenizer.nextToken();
   const string &text = tokenizer.getTokenText();

   DEBUGOUT(dbg_parser, "doStateExpectSegmentName: " << text);

   switch(tokenizer.getTokenType())
   {
   case HL7Tokenizer::TOKEN_FIELDVALUE:
      // Instantiate a new segment
      newSegment = HL7Segment::SegmentForSegmentName(text.c_str());
      if(!newSegment)
      {
         state     = STATE_STOP; // error: unknown segment type
         errorCode = HL7_ERR_UNKNOWNSEGMENT;
         return;
      }
      setNewSegment(newSegment);
      segments->push_back(curSegment);
      if(text == "MSH")
      {
         state = STATE_MSHSPECIALCHARS; // hack for MSH encoding characters
         nextField('|'); // prepare the first field for input
         tokenizer.setDisableEscapes(true);
      }
      else
         state = STATE_EXPECTDELIMITER;
      break;
   case HL7Tokenizer::TOKEN_EOS:
      state = STATE_DONE; // finished successfully.
      break;
   default: // anything else is an error.
      state     = STATE_STOP; // error: unexpected token
      errorCode = HL7_ERR_UNEXPTOKEN;
      break;
   }
}

//
// HL7Parser::doStateMSHSpecialChars
//
// Get past the special little parser nightmare that is the beginning of a MSH
// segment. This semi-qualifies as a hack.
//
void HL7Parser::doStateMSHSpecialChars()
{
   tokenizer.nextToken();

   DEBUGOUT(dbg_parser, "doStateMSHSpecialChars: " << tokenizer.getTokenText());

   switch(tokenizer.getTokenType())
   {
   case HL7Tokenizer::TOKEN_PIPE:
   case HL7Tokenizer::TOKEN_HAT:
   case HL7Tokenizer::TOKEN_TILDE:
   case HL7Tokenizer::TOKEN_BACKSLASH:
      curValue += tokenizer.getTokenText();
      break; // continue in same state
   case HL7Tokenizer::TOKEN_AMPERSAND:
      curValue += tokenizer.getTokenText();
      curField->value = curValue;
      state = STATE_EXPECTDELIMITER;
      tokenizer.setDisableEscapes(false);
      break;
   default:
      state     = STATE_STOP; // error: unexpected token
      errorCode = HL7_ERR_UNEXPTOKEN;
      break;
   }
}

//
// HL7Parser::doStateExpectDelimiter
//
// We should see a delimiter. A field value is NOT valid in this position.
//
void HL7Parser::doStateExpectDelimiter()
{
   tokenizer.nextToken();

   DEBUGOUT(dbg_parser, "doStateExpectDelimiter: " << tokenizer.getTokenText());

   switch(tokenizer.getTokenType())
   {
   case HL7Tokenizer::TOKEN_PIPE:
      if(repeating) // if we were repeating, we're not any more.
      {
         repeating = false;
         repeatCount = 0;
      }
      // fall through.
   case HL7Tokenizer::TOKEN_HAT:
   case HL7Tokenizer::TOKEN_AMPERSAND:
      nextField(tokenizer.getTokenText()[0]);
      if(delimiterError)
         state = STATE_SPIN;
      else
         state = STATE_EXPECTVALUEORDELIMITER;
      break;
   case HL7Tokenizer::TOKEN_TILDE:
      // We are going to repeat the top-level section through which we just
      // traversed. First, restore the saved stack and field pointer...
      pstack   = backupStack;
      curField = backupValue;
      DEBUGOUT(dbg_parser, "Repeating field, rewound to " << curField->fieldname);
      repeating = true;
      state = STATE_EXPECTVALUEORDELIMITER;
      break;
   case HL7Tokenizer::TOKEN_EOL: // end of segment
      state = STATE_EXPECTSEGMENTNAME;
      break;
   case HL7Tokenizer::TOKEN_EOS: // end of message
      state = STATE_DONE;
      break;
   default:
      state     = STATE_STOP; // error: unexpected token
      errorCode = HL7_ERR_UNEXPTOKEN;
      break;
   }
}

//
// HL7Parser::doStateExpectValueorDelimiter
//
// We expect to either see a field value, or another consecutive delimiter in
// the event of an empty field value.
//
void HL7Parser::doStateExpectValueOrDelimiter()
{
   tokenizer.nextToken();

   DEBUGOUT(dbg_parser, "doStateExpectValueOrDelimiter: " << tokenizer.getTokenText());

   switch(tokenizer.getTokenType())
   {
   case HL7Tokenizer::TOKEN_FIELDVALUE:
      if(repeating)
      {
         HL7Value *newValue = new HL7Value;

         newValue->repeatValues = curField->repeatValues;
         curField->repeatValues = newValue;

         newValue->repeatCount = repeatCount;
         newValue->value = tokenizer.getTokenText();
      }
      else
         curField->value = tokenizer.getTokenText();
      state = STATE_EXPECTDELIMITER;
      break;
   case HL7Tokenizer::TOKEN_PIPE:
      if(repeating) // if we were repeating, we're not anymore.
      {
         repeating = false;
         repeatCount = 0;
      }
      // fall through.
   case HL7Tokenizer::TOKEN_HAT:
   case HL7Tokenizer::TOKEN_AMPERSAND:
      nextField(tokenizer.getTokenText()[0]);
      if(delimiterError)
         state = STATE_SPIN;
      break;
   case HL7Tokenizer::TOKEN_TILDE:
      // We are going to repeat the top-level section through which we just
      // traversed. First, restore the saved stack and field pointer...
      ++repeatCount;
      pstack   = backupStack;
      curField = backupValue;
      DEBUGOUT(dbg_parser, "Repeating field, rewound to " << curField->fieldname);
      repeating = true;
      // stay in same state.
      break;
   case HL7Tokenizer::TOKEN_EOL: // end of segment
      state = STATE_EXPECTSEGMENTNAME;
      break;
   case HL7Tokenizer::TOKEN_EOS: // end of message
      state = STATE_DONE;
      break;
   }
}

//
// HL7Parser::doStateSpin
//
// There are unexpected subfields in the token stream. We need to jump past
// them by scanning forward until we find the delimiter we were previously
// expecting to see - then we can step/pop the value tree and continue like
// nothing ever happened...
//
void HL7Parser::doStateSpin()
{
   tokenizer.nextToken();

   DEBUGOUT(dbg_parser, "doStateSpin: " << tokenizer.getTokenText());

   delimiterError = false; // clear the error flag.

   switch(tokenizer.getTokenType())
   {
   case HL7Tokenizer::TOKEN_PIPE:
      if(spinExpectedDelim == '^' || spinExpectedDelim == '|')
      {
         nextField('|');
         state = STATE_EXPECTVALUEORDELIMITER;
      }
      break; // otherwise, continue scanning
   case HL7Tokenizer::TOKEN_HAT:
      if(spinExpectedDelim == '^')
      {
         nextField('^');
         state = STATE_EXPECTVALUEORDELIMITER;
      }
      break; // otherwise, continue scanning
   case HL7Tokenizer::TOKEN_EOL: // oh thank god! let's get out of here.
      state = STATE_EXPECTSEGMENTNAME;
      break;
   case HL7Tokenizer::TOKEN_EOS: // hooray! Out without a doubt.
      state = STATE_DONE;
      break;
   case HL7Tokenizer::TOKEN_TILDE:
      // We are going to repeat the top-level section through which we just
      // traversed. First, restore the saved stack and field pointer...
      ++repeatCount;
      pstack   = backupStack;
      curField = backupValue;
      repeating = true;
      state = STATE_EXPECTVALUEORDELIMITER;
      break;
   default:
      break; // anything else: continue spinning
   }
}

// Parser state table
void (HL7Parser::* HL7Parser::States[])() =
{
   &HL7Parser::doStateExpectSegmentName,      // STATE_EXPECTSEGMENTNAME
   &HL7Parser::doStateMSHSpecialChars,        // STATE_MSHSPECIALCHARS
   &HL7Parser::doStateExpectDelimiter,        // STATE_EXPECTDELIMITER
   &HL7Parser::doStateExpectValueOrDelimiter, // STATE_EXPECTVALUEORDELIMITER
   &HL7Parser::doStateSpin,                   // STATE_SPIN
};

//
// HL7Parser::parse
//
// Takes a full HL7 message made up of an arbitrary sequence of segments and
// transforms it into a series of HL7Segment descendant object instances
// containing trees of HL7Value instances, which contain both the field name and
// its value (if any) that was discovered in the HL7 segment.
//
// After this call, check the error state to see if parsing was fully successful
// or not. Even if it failed, there may be valid data inside the value tree(s).
//
// You own the SegmentVector upon return. Free it when you're done using
// HL7Parser::DestroySegments, unless you really like memory leaks ;)
//
HL7Parser::SegmentVector *HL7Parser::parse()
{
   segments = new SegmentVector;

   while(1)
   {
      (this->*States[state])();

      if(state == STATE_DONE || state == STATE_STOP)
         break;
   }

   // did we stop because of an error? then flag it.
   if(state == STATE_STOP)
   {
      errorState = true;
      DEBUGOUT(dbg_parser, "An error occurred during parsing");
   }

   SegmentVector *retval = segments;
   segments = NULL; // we don't own it any more!

   return retval;
}

//
// HL7Parser::loadFile
//
// Load an input file for parsing.
//
bool HL7Parser::loadFile(const char *filename)
{
   if(input)
      delete [] input;
      
   input = LoadTextFile(filename);
   if(!input)
      return false;

   // reset all internal state
   tokenizer.setInput(input);
   tokenizer.setDisableEscapes(false);
   state          = STATE_EXPECTSEGMENTNAME;
   segments       = NULL;
   curSegment     = NULL;
   curValue       = "";
   curField       = NULL;
   delimiterError = false;
   errorState     = false;
   errorCode      = HL7_ERR_NOERROR;

   while(!pstack.empty())
      pstack.pop();

   repeating   = false;
   backupStack = pstack;
   backupValue = NULL;
   repeatCount = 0;

   return true;
}

//
// HL7Parser::loadString
//
// For testing only, don't abuse it :P
//
void HL7Parser::loadString(const char *text)
{
   if(input)
      delete [] input;

   input = cpp_strdup(text);
   if(!input)
      return;

   // reset all internal state
   tokenizer.setInput(input);
   tokenizer.setDisableEscapes(false);
   state          = STATE_EXPECTSEGMENTNAME;
   segments       = NULL;
   curSegment     = NULL;
   curValue       = "";
   curField       = NULL;
   delimiterError = false;
   errorState     = false;
   errorCode      = HL7_ERR_NOERROR;

   while(!pstack.empty())
      pstack.pop();

   repeating   = false;
   backupStack = pstack;
   backupValue = NULL;
   repeatCount = 0;
}

//
// HL7Parser::getErrorMessage
//
// Return the error message for the errorCode that occurred during parsing.
//
const char *HL7Parser::getErrorMessage() const
{
   return errorMessages[errorCode];
}

//=============================================================================
//
// Old parser test code; left here for reference.
//

/*
// Another test function
static void TestPrintHL7ValueTree(HL7Value *root, int numComponents)
{
   for(int i = 0; i < numComponents && root->fieldname; i++)
   {
      if(root->numExpectedSubComponents)
         TestPrintHL7ValueTree(root->subComponents, root->numExpectedSubComponents);
      else
      {
         cout << "   field:\t" << root->fieldname << "\n"
              << "   level:\t" << root->level     << "\n"
              << "   value:\t" << root->value     << "\n";

         if(root->repeatValues)
         {
            // NB: these will print out in reverse order...
            // Getting them in input order requires recursing down the
            // list.
            HL7Value *reprover = root->repeatValues;
            while(reprover)
            {
               cout << "  repeat:\t" << reprover->value << "\n";
               reprover = reprover->repeatValues;
            }
         }

         cout << endl;
      }
      ++root;
   }
}

// HL7 tree print test function 2 - only printing fields with a value in them.
static void TestPrintHL7ValueTree2(HL7Value *parent, HL7Value *root, int numComponents)
{
   for(int i = 0; i < numComponents && root->fieldname; i++)
   {
      if(root->numExpectedSubComponents)
         TestPrintHL7ValueTree2(root, root->subComponents, root->numExpectedSubComponents);
      else if(root->value != "" || root->repeatValues)
      {
         if(parent)
         {
            cout << "   " << parent->fieldname << "::field:\t" << root->fieldname << "\n"
                 << "   " << parent->fieldname << "::level:\t" << root->level     << "\n"
                 << "   " << parent->fieldname << "::value:\t" << root->value     << "\n";
         }
         else
         {
            cout << "   field:\t" << root->fieldname << "\n"
                 << "   level:\t" << root->level     << "\n"
                 << "   value:\t" << root->value     << "\n";
         }

         if(root->repeatValues)
         {
            HL7Value *reprover = root->repeatValues;
            while(reprover)
            {
               cout << "  repeat:\t" << reprover->value << "\n";
               reprover = reprover->repeatValues;
            }
         }

         cout << endl;
      }
      ++root;
   }
}
*/

// EOF