/* HL7 Parser Prototype */ #include #include #include "globaldefines.h" #include "hl7parse.h" #include "stringutils.h" #include "utils.h" #pragma warn -8060 //============================================================================= // // HL7Value // // // HL7Value::findInSubComponents0 // // Searches for the named component amongst subcomponents of this value. // This protected version returns NULL on failure. It is for use only in // HL7Segment::findInValueTree. // HL7Value *HL7Value::findInSubComponents0(const char *token) { HL7Value *ret = NULL; if(numExpectedSubComponents > 0) { unsigned int key = D_HashTableKey(token) % NUMSUBCOMPCHAINS; HL7Value *chain = subCompChains[key]; while(chain && stricmp(chain->fieldname, token)) chain = chain->next; ret = chain; } return ret; } // // HL7Value::findInSubComponents // // Searches for the named component amongst subcomponents of this value. // HL7Value *HL7Value::findInSubComponents(const char *token) { static HL7Value dummyValue; HL7Value *ret = &dummyValue; if(numExpectedSubComponents > 0) { unsigned int key = D_HashTableKey(token) % NUMSUBCOMPCHAINS; HL7Value *chain = subCompChains[key]; while(chain && stricmp(chain->fieldname, token)) chain = chain->next; ret = chain; } if(!ret || !ret->fieldname) ret = &dummyValue; return ret; } // // HL7Value Destructor // // jhaley 20121019: Because properly freeing the single largest set of data // structures in your program is usually a good idea... // HL7Value::~HL7Value() { // Am I on a repeatValues chain? if so, free next on the chain first. // It will recursively free its next repeat and so on. if(repeatValues) { delete repeatValues; repeatValues = NULL; } // Do I have subcomponents? Free them. They will free their own subcomponents // recursively. if(subComponents) { delete [] subComponents; subComponents = NULL; } } //============================================================================= // // HL7Segment - Abstract Segment Base Class // // RTTI/Factory Construction Proxy HL7Segment::Type HL7Segment::StaticType("HL7", NULL); // intentionally non-matching. // Hash table chain head pointers HL7Segment::Type *HL7Segment::Type::segmentTypes[NUMTYPECHAINS]; // // HL7Segment::Type::FindType // // Find the proxy class instance representing a particular type of HL7 message // by the HL7 message type (ie., "MSH") // HL7Segment::Type *HL7Segment::Type::FindType(const char *pName) { unsigned int hashcode = D_HashTableKeyCase(pName) % NUMTYPECHAINS; Type *chain = segmentTypes[hashcode]; while(chain && strcmp(chain->name, pName)) chain = chain->next; return chain; } // // HL7Segment::Type::addType // // Called by HL7Segment's constructor; adds the instance to this class's static // hash table for runtime lookups. // void HL7Segment::Type::addType() { unsigned int hashcode; // Add it to the hash table; order is unimportant. hashcode = D_HashTableKeyCase(name) % NUMTYPECHAINS; this->next = segmentTypes[hashcode]; segmentTypes[hashcode] = this; } // // HL7Segment::SegmentForSegmentName // // Static HL7 message factory. // Instantiate the proper type of segment object given an HL7 message name. // HL7Segment *HL7Segment::SegmentForSegmentName(const char *name) { HL7Segment *retval = NULL; Type *thatType = Type::FindType(name); if(thatType) { retval = thatType->newSegment(); retval->constructValueTree(); // construct value tree here. } return retval; } // // HL7Segment::constructValueTreeRecursive // // Walks down the HL7FieldInfo structure, recursing whenever an entry in the // array indicates it has multiple expected sub-components. // void HL7Segment::constructValueTreeRecursive(const HL7FieldInfo *info, int numComponents, HL7Value **root) { HL7Value *rover; *root = new HL7Value [numComponents]; rover = *root; for(int i = 0; i < numComponents; i++) { const HL7FieldInfo *current = &info[i]; rover->level = current->level; rover->fieldname = current->fieldname; rover->numExpectedSubComponents = current->numExpectedSubComponents; rover->value = ""; if(current->numExpectedSubComponents > 0) { constructValueTreeRecursive(current->subComponents, current->numExpectedSubComponents, &rover->subComponents); // construct subcomponent hash table memset(rover->subCompChains, 0, NUMSUBCOMPCHAINS*sizeof(HL7Value *)); for(int sc = 0; sc < rover->numExpectedSubComponents-1; sc++) { unsigned int key = D_HashTableKey(rover->subComponents[sc].fieldname) % NUMSUBCOMPCHAINS; rover->subComponents[sc].next = rover->subCompChains[key]; rover->subCompChains[key] = &(rover->subComponents[sc]); } } else rover->subComponents = NULL; // step to next at this level ++rover; } } // // HL7Segment::constructValueTree // // Kicks off the recursive value tree building process executed by the above // routine with information about the segment's field info. // void HL7Segment::constructValueTree() { constructValueTreeRecursive(getFieldInfo(), getNumFields(), &valueTree); // construct top level hash table memset(valueChains, 0, NUMSUBCOMPCHAINS * sizeof(HL7Value *)); for(int i = 0; i < getNumFields() - 1; i++) { unsigned int key = D_HashTableKey(valueTree[i].fieldname) % NUMSUBCOMPCHAINS; valueTree[i].next = valueChains[key]; valueChains[key] = &valueTree[i]; } } // // HL7Segment::destroyValueTree // // Recursively deletes all the HL7Value instances. // void HL7Segment::destroyValueTree(HL7Value *root) { // Delete the HL7 value tree root components. // Freeing of subcomponents and repeat values is the responsibility of // HL7Value's destructor (as of 20121019; this plugs a massive memory leak). delete [] root; } // // HL7Segment Destructor // // Free the valueTree. // HL7Segment::~HL7Segment() { if(valueTree) { destroyValueTree(valueTree); valueTree = NULL; } } // // HL7Segment::findInValueTree // // Send in a string consisting of hierarchically related field names separated // by "." characters, ex: "phone_number.compound" - starting from the root // value array of the segment, the names will be matched recursively and then // the resulting value (if such exists) will be returned. // HL7Value *HL7Segment::findInValueTree(const char *fieldPath) { static HL7Value dummyValue; HL7Value *ret = &dummyValue; HL7Value *cur = valueTree; char *tempbuf = strdup(fieldPath); char *rover = tempbuf; rover = strtok(rover, "."); // get first token // find the first specified field name in the segment array unsigned int key = D_HashTableKey(rover) % NUMSUBCOMPCHAINS; HL7Value *chain = valueChains[key]; while(chain && stricmp(chain->fieldname, rover)) chain = chain->next; cur = chain; if(cur && cur->fieldname) { // continue to tokenize the string, descending at each match while((rover = strtok(NULL, "."))) { cur = cur->findInSubComponents0(rover); // need NULL on failure here... if(!cur) break; // not found. } ret = cur; } if(!ret || !ret->fieldname) ret = &dummyValue; free(tempbuf); return ret; } // // HL7Segment::valueForIndex // // Given one of the enumeration values defined for expected fields in HL7Segment // descendant classes, this will return the value at that index. // ex: value = mshSegment->valueForIndex(MSHSegment::MSH_SPECIALCHARS); // HL7Value *HL7Segment::valueForIndex(int i) { static HL7Value dummyValue; HL7Value *ret = &dummyValue; #ifdef RANGECHECK if(i >= 0 && i < getNumFields()) #endif ret = &valueTree[i]; return ret; } //============================================================================= // // Tokenizer // // Token Names - For debug output. const char *const HL7Tokenizer::TokenNames[TOKEN_NUMTOKENS] = { "TOKEN_UNKNOWN", "TOKEN_FIELDVALUE", "TOKEN_PIPE", "TOKEN_HAT", "TOKEN_TILDE", "TOKEN_BACKSLASH", "TOKEN_AMPERSAND", "TOKEN_EOL", "TOKEN_EOS" }; // State Handlers // // HL7Tokenizer::doStateScan // // Scan for the start of a new token. // void HL7Tokenizer::doStateScan() { switch(input[textPos]) { case '|': tokenType = TOKEN_PIPE; tokenText = "|"; state = TSTATE_DONE; break; case '^': tokenType = TOKEN_HAT; tokenText = "^"; state = TSTATE_DONE; break; case '~': tokenType = TOKEN_TILDE; tokenText = "~"; state = TSTATE_DONE; break; case '&': tokenType = TOKEN_AMPERSAND; tokenText = "&"; state = TSTATE_DONE; break; case '\\': // Escape character if(!disableEscapes) { tokenType = TOKEN_FIELDVALUE; escapedState = state; state = TSTATE_ESCAPE; } else { tokenType = TOKEN_BACKSLASH; tokenText = "\\"; state = TSTATE_DONE; } break; case 0x0D: // End of line tokenType = TOKEN_EOL; tokenText = static_cast(0x0D); state = TSTATE_DONE; break; case '\0': // End of string doNotAdvance = true; tokenType = TOKEN_EOS; tokenText = ""; state = TSTATE_DONE; break; default: // Anything else starts a field value. tokenType = TOKEN_FIELDVALUE; tokenText = input[textPos]; state = TSTATE_INFIELD; break; } } // // HL7Tokenizer::doStateInField // // The tokenizer is inside a field value. Read until a special delimiter, // end-of-line, or end-of-string. Escaped character sequences may be // encountered. // void HL7Tokenizer::doStateInField() { switch(input[textPos]) { case '|': case '^': case '~': case '&': case 0x0D: // End of line case '\0': // End of string doNotAdvance = true; // Special char, don't advance textPos state = TSTATE_DONE; break; case '\\': // Escape character if(!disableEscapes) { escapedState = state; state = TSTATE_ESCAPE; } else tokenText += input[textPos]; // interpret literally if escapes disabled break; default: // Anything else is part of the current token tokenText += input[textPos]; break; } } // // HL7Tokenizer::doStateEscape // // Reading inside an escape. The sequence should be terminated with another // backslash character. // void HL7Tokenizer::doStateEscape() { switch(input[textPos]) { case '\\': // End of escape sequence // Where we go next depends on the state from whence we came switch(escapedState) { case TSTATE_SCAN: state = TSTATE_INFIELD; // we are now in a field break; case TSTATE_INFIELD: state = TSTATE_INFIELD; // remain in same state we came from. break; default: // something is wack... go back to scanning. state = TSTATE_SCAN; break; } break; case 'E': tokenText += '\\'; break; case 'F': tokenText += '|'; break; case 'H': // Highlight: not handled. break; case 'N': // Normal: not handled. break; case 'R': tokenText += '~'; break; case 'S': tokenText += '^'; break; case 'T': tokenText += '&'; break; case 'C': // Hex char escapes case 'M': case 'X': case 'Z': hexNum = ""; state = TSTATE_ESCHEX; break; } } // // HL7Tokenizer::doStateEscHex // // Reading an escape that consists of a hexadecimal character value. // void HL7Tokenizer::doStateEscHex() { if((input[textPos] >= '0' && input[textPos] <= '9') || (input[textPos] >= 'A' && input[textPos] <= 'F') || (input[textPos] >= 'a' && input[textPos] <= 'f')) { hexNum += input[textPos]; } else if(input[textPos] == '\\') { tokenText += static_cast(strtol(hexNum.c_str(), NULL, 16)); // Where we go next depends on the state from whence we came switch(escapedState) { case TSTATE_SCAN: state = TSTATE_INFIELD; // we are now in a field break; case TSTATE_INFIELD: state = TSTATE_INFIELD; // remain in same state we came from. break; default: // something is wack... go back to scanning. state = TSTATE_SCAN; break; } } else // else: the pooch is screwed... what to do? { state = TSTATE_SCAN; // try going back to scanning... DEBUGOUT(dbg_parser, "Parser: WARNING: breaking out of TSTATE_ESCHEX due to a malformatted escape!"); } } // // Tokenizer FSA state table // // The state value in HL7Tokenizer indexes into this array of method pointers. // The loop in HL7Tokenizer::nextToken below will dispatch to the proper handler // using the value of the state variable. // void (HL7Tokenizer::* HL7Tokenizer::States[])() = { &HL7Tokenizer::doStateScan, // TSTATE_SCAN &HL7Tokenizer::doStateInField, // TSTATE_INFIELD &HL7Tokenizer::doStateEscape, // TSTATE_ESCAPE &HL7Tokenizer::doStateEscHex, // TSTATE_ESCHEX }; // // HL7Tokenizer::nextToken // // Call to get the next token from an HL7 message. // // Pre-conditions: // * Set input first. // // Post-conditions: // * tokenType is the type of token retrieved. // * tokenText contains the contents of that token. // void HL7Tokenizer::nextToken() { if(!input) { tokenType = TOKEN_EOS; tokenText = ""; return; // Woops! I can't parse a NULL pointer, dude. } // Setup initial parameters of the finite state automaton state = TSTATE_SCAN; tokenText = ""; tokenType = TOKEN_UNKNOWN; hexNum = ""; doNotAdvance = false; while(1) { // Dispatch state handler (this->*States[state])(); // Advance to next char unless told not to do so. if(!doNotAdvance) ++textPos; // Clear do-not-advance flag, unless at EOS. if(input[textPos] != '\0') doNotAdvance = false; // Done? if(state == TSTATE_DONE) break; } } //============================================================================= // // Parser Error Codes // enum { HL7_ERR_NOERROR, // 0 is not an error. HL7_ERR_UNKNOWNSEGMENT, // An unknown segment was encountered. HL7_ERR_UNEXPTOKEN, // An unexpected token was in the input stream. HL7_ERR_NUMERRORS // Must be last. }; static const char *const errorMessages[HL7_ERR_NUMERRORS] = { "No error occurred", "An unknown HL7 segment type was encountered", "An unexpected token was encountered", }; //============================================================================== // // HL7Parser // // // HL7Parser::setCurField // // When setting a field, the tree walk should automatically recurse to the // deepest level possible, without moving forward. // void HL7Parser::setCurField(HL7Value *newField) { int level; curField = newField; level = curField->level; DEBUGOUT(dbg_parser, "Parser: setCurField: setting to " << curField->fieldname); // always go down the tree as far as possible when stepping while(curField->numExpectedSubComponents > 0) { pstack.push(curField); // remember where to go when returning here curField = curField->subComponents; } // If we are not currently repeating a top-level value, then record this // information so that the parser can jump back to this state later. if(!repeating && level == 1) { backupStack = pstack; backupValue = curField; } DEBUGOUT(dbg_parser, "Parser: setCurField: recursed to " << curField->fieldname); } // // HL7Parser::popStack // // Just move up the stack one level. Do not step to the next field. // void HL7Parser::popStack() { DEBUGOUT(dbg_parser, "popStack"); if(!pstack.empty()) { HL7Value *nextField = pstack.top(); pstack.pop(); setCurField(nextField); } } // // HL7Parser::popAndStep // // Move up the stack one level, and then step to the next field. // void HL7Parser::popAndStep() { DEBUGOUT(dbg_parser, "popAndStep"); if(!pstack.empty()) { HL7Value *nextField = pstack.top(); pstack.pop(); setCurField(nextField + 1); } } // // HL7Parser::popTwoAndStep // // Pop up two levels, and then step to the next field. // void HL7Parser::popTwoAndStep() { DEBUGOUT(dbg_parser, "popTwoAndStep"); HL7Value *nextField; pstack.pop(); nextField = pstack.top(); DEBUGOUT(dbg_parser, "popTwoAndStep: nextField is " << nextField->fieldname); pstack.pop(); setCurField(nextField + 1); } // // HL7Parser::determineExpectedDelimiter // // Figures out what kind of delimiter we would expect to occur in the token // stream next based on the current position inside the HL7Value tree. // char HL7Parser::determineExpectedDelimiter(bool &needToPop) { char expectedDelim = '?'; // if you see this, something is very wrong. HL7Value *next = curField + 1; if(curField->level == 1 && next->level == 1) { expectedDelim = '|'; } else if(curField->level == 2 && next->fieldname == NULL) { expectedDelim = '|'; needToPop = true; // need to pop now. } else if((curField->level == 2 && next->level == 2)) { expectedDelim = '^'; } else if(curField->level == 3 && next->fieldname == NULL) { expectedDelim = '^'; needToPop = true; // need to pop now. } else if(curField->level == 3 && next->level == 3) { expectedDelim = '&'; } DEBUGOUT(dbg_parser, "determineExpectedDelimiter: expecting " << expectedDelim); return expectedDelim; } // // HL7Parser::stepToNext // // Step to the next field at the same level. // void HL7Parser::stepToNext() { DEBUGOUT(dbg_parser, "stepToNext"); setCurField(curField + 1); } // // HL7Parser::badDelim // // If we hit an extra delimiter in the stream, then there are more subfields in // the record than the specification being implemented indicates should exist. // In that case, we have to enter the "spin" state, which cycles continuously // until the type of delimiter we DID expect to find is encountered, as that // signals the actual end of the current level of tokens. // void HL7Parser::badDelim(char expected, char found) { DEBUGOUT(dbg_parser, "badDelim"); // flag the delimiter error condition delimiterError = true; spinExpectedDelim = expected; spinFoundDelim = found; } // // HL7Parser::nextField // // We need to go as deep into the HL7Value tree as immediately possible. // // Note that the presence of a stack in this state machine suggests that HL7 // is actually a context-insensitive language and not simply regular. Unusually, // it seems you can accept or reject it just fine using a FSA - you just can't // easily extract the proper hierarchical relationship between the fields if // you don't use a PDA-like stack mechanism. // void HL7Parser::nextField(char delimiter) { bool needToPop = false; DEBUGOUT(dbg_parser, "nextField"); if(!curField) // just starting? { setCurField(curSegment->valueTree); } else { char expectedDelim = determineExpectedDelimiter(needToPop); switch(expectedDelim) { case '|': switch(delimiter) { case '&': badDelim('|', '&'); break; // unexpected subfields case '^': badDelim('|', '^'); break; // unexpected subfields case '|': if(needToPop) popAndStep(); // pop and then step else stepToNext(); // step to next at same level break; } break; case '^': switch(delimiter) { case '&': badDelim('^', '&'); break; // unexpected subfields case '^': if(needToPop) popAndStep(); // pop and then step else stepToNext(); // step to next at same level break; case '|': popAndStep(); break; // end of subrecords } break; case '&': switch(delimiter) { case '&': stepToNext(); break; // step to next, same level case '^': popAndStep(); break; // end subrecords case '|': popTwoAndStep(); break; // end subrecordsx2 } } } } /* HL7 Grammar (Pseudo-BNF) ----------------------------------------------------------------------------- := := | :=
:= string value := | & ~ ^ := | := string value HL7 STATE TRANSITION DIAGRAM ----------------------------------------------------------------------------- EXPECTSEGMENTNAME: "MSH" : MSHSPECIALCHARS FIELDVALUE : EXPECTDELIMITER EOS : DONE (Return whole message) * : ERROR MSHSPECIALCHARS: PIPE : MSHSPECIALCHARS HAT : MSHSPECIALCHARS AMPERSAND : MSHSPECIALCHARS BACKSLASH : MSHSPECIALCHARS TILDE : EXPECTDELIMITER * : ERROR EXPECTDELIMITER: PIPE : EXPECTVALUEORDELIMITER HAT : EXPECTVALUEORDELIMITER AMPERSAND : EXPECTVALUEORDELIMITER TILDE : EXPECTVALUEORDELIMITER EOL : EXPECTSEGMENTNAME (Start new segment) EOS : DONE (Return whole message) * : ERROR EXPECTVALUEORDELIMITER: FIELDVALUE : EXPECTDELIMITER PIPE : EXPECTVALUEORDELIMITER HAT : EXPECTVALUEORDELIMITER AMPERSAND : EXPECTVALUEORDELIMITER TILDE : EXPECTVALUEORDELIMITER EOL : EXPECTSEGMENTNAME (Start new segment) EOS : DONE (Return whole message) NB: Transitions to and from STATE_SPIN depend on the state of the parser stack, making this state machine a pushdown automaton when extracting data in a hierarchical fashion - see information at the following URL: http://en.wikipedia.org/wiki/Pushdown_automaton */ // Parser state handlers // // HL7Parser::setNewSegment // // Clear out the per-segment parsing state at the start of a new segment. // void HL7Parser::setNewSegment(HL7Segment *newSegment) { // set the segment pointer curSegment = newSegment; // clear the stack while(!pstack.empty()) pstack.pop(); // clear curField pointer curField = NULL; } // // HL7Parser::doStateExpectSegmentName // // Initial state. Returned to anytime a TOKEN_EOL is found. // void HL7Parser::doStateExpectSegmentName() { HL7Segment *newSegment; tokenizer.nextToken(); const string &text = tokenizer.getTokenText(); DEBUGOUT(dbg_parser, "doStateExpectSegmentName: " << text); switch(tokenizer.getTokenType()) { case HL7Tokenizer::TOKEN_FIELDVALUE: // Instantiate a new segment newSegment = HL7Segment::SegmentForSegmentName(text.c_str()); if(!newSegment) { state = STATE_STOP; // error: unknown segment type errorCode = HL7_ERR_UNKNOWNSEGMENT; return; } setNewSegment(newSegment); segments->push_back(curSegment); if(text == "MSH") { state = STATE_MSHSPECIALCHARS; // hack for MSH encoding characters nextField('|'); // prepare the first field for input tokenizer.setDisableEscapes(true); } else state = STATE_EXPECTDELIMITER; break; case HL7Tokenizer::TOKEN_EOS: state = STATE_DONE; // finished successfully. break; default: // anything else is an error. state = STATE_STOP; // error: unexpected token errorCode = HL7_ERR_UNEXPTOKEN; break; } } // // HL7Parser::doStateMSHSpecialChars // // Get past the special little parser nightmare that is the beginning of a MSH // segment. This semi-qualifies as a hack. // void HL7Parser::doStateMSHSpecialChars() { tokenizer.nextToken(); DEBUGOUT(dbg_parser, "doStateMSHSpecialChars: " << tokenizer.getTokenText()); switch(tokenizer.getTokenType()) { case HL7Tokenizer::TOKEN_PIPE: case HL7Tokenizer::TOKEN_HAT: case HL7Tokenizer::TOKEN_TILDE: case HL7Tokenizer::TOKEN_BACKSLASH: curValue += tokenizer.getTokenText(); break; // continue in same state case HL7Tokenizer::TOKEN_AMPERSAND: curValue += tokenizer.getTokenText(); curField->value = curValue; state = STATE_EXPECTDELIMITER; tokenizer.setDisableEscapes(false); break; default: state = STATE_STOP; // error: unexpected token errorCode = HL7_ERR_UNEXPTOKEN; break; } } // // HL7Parser::doStateExpectDelimiter // // We should see a delimiter. A field value is NOT valid in this position. // void HL7Parser::doStateExpectDelimiter() { tokenizer.nextToken(); DEBUGOUT(dbg_parser, "doStateExpectDelimiter: " << tokenizer.getTokenText()); switch(tokenizer.getTokenType()) { case HL7Tokenizer::TOKEN_PIPE: if(repeating) // if we were repeating, we're not any more. { repeating = false; repeatCount = 0; } // fall through. case HL7Tokenizer::TOKEN_HAT: case HL7Tokenizer::TOKEN_AMPERSAND: nextField(tokenizer.getTokenText()[0]); if(delimiterError) state = STATE_SPIN; else state = STATE_EXPECTVALUEORDELIMITER; break; case HL7Tokenizer::TOKEN_TILDE: // We are going to repeat the top-level section through which we just // traversed. First, restore the saved stack and field pointer... pstack = backupStack; curField = backupValue; DEBUGOUT(dbg_parser, "Repeating field, rewound to " << curField->fieldname); repeating = true; state = STATE_EXPECTVALUEORDELIMITER; break; case HL7Tokenizer::TOKEN_EOL: // end of segment state = STATE_EXPECTSEGMENTNAME; break; case HL7Tokenizer::TOKEN_EOS: // end of message state = STATE_DONE; break; default: state = STATE_STOP; // error: unexpected token errorCode = HL7_ERR_UNEXPTOKEN; break; } } // // HL7Parser::doStateExpectValueorDelimiter // // We expect to either see a field value, or another consecutive delimiter in // the event of an empty field value. // void HL7Parser::doStateExpectValueOrDelimiter() { tokenizer.nextToken(); DEBUGOUT(dbg_parser, "doStateExpectValueOrDelimiter: " << tokenizer.getTokenText()); switch(tokenizer.getTokenType()) { case HL7Tokenizer::TOKEN_FIELDVALUE: if(repeating) { HL7Value *newValue = new HL7Value; newValue->repeatValues = curField->repeatValues; curField->repeatValues = newValue; newValue->repeatCount = repeatCount; newValue->value = tokenizer.getTokenText(); } else curField->value = tokenizer.getTokenText(); state = STATE_EXPECTDELIMITER; break; case HL7Tokenizer::TOKEN_PIPE: if(repeating) // if we were repeating, we're not anymore. { repeating = false; repeatCount = 0; } // fall through. case HL7Tokenizer::TOKEN_HAT: case HL7Tokenizer::TOKEN_AMPERSAND: nextField(tokenizer.getTokenText()[0]); if(delimiterError) state = STATE_SPIN; break; case HL7Tokenizer::TOKEN_TILDE: // We are going to repeat the top-level section through which we just // traversed. First, restore the saved stack and field pointer... ++repeatCount; pstack = backupStack; curField = backupValue; DEBUGOUT(dbg_parser, "Repeating field, rewound to " << curField->fieldname); repeating = true; // stay in same state. break; case HL7Tokenizer::TOKEN_EOL: // end of segment state = STATE_EXPECTSEGMENTNAME; break; case HL7Tokenizer::TOKEN_EOS: // end of message state = STATE_DONE; break; } } // // HL7Parser::doStateSpin // // There are unexpected subfields in the token stream. We need to jump past // them by scanning forward until we find the delimiter we were previously // expecting to see - then we can step/pop the value tree and continue like // nothing ever happened... // void HL7Parser::doStateSpin() { tokenizer.nextToken(); DEBUGOUT(dbg_parser, "doStateSpin: " << tokenizer.getTokenText()); delimiterError = false; // clear the error flag. switch(tokenizer.getTokenType()) { case HL7Tokenizer::TOKEN_PIPE: if(spinExpectedDelim == '^' || spinExpectedDelim == '|') { nextField('|'); state = STATE_EXPECTVALUEORDELIMITER; } break; // otherwise, continue scanning case HL7Tokenizer::TOKEN_HAT: if(spinExpectedDelim == '^') { nextField('^'); state = STATE_EXPECTVALUEORDELIMITER; } break; // otherwise, continue scanning case HL7Tokenizer::TOKEN_EOL: // oh thank god! let's get out of here. state = STATE_EXPECTSEGMENTNAME; break; case HL7Tokenizer::TOKEN_EOS: // hooray! Out without a doubt. state = STATE_DONE; break; case HL7Tokenizer::TOKEN_TILDE: // We are going to repeat the top-level section through which we just // traversed. First, restore the saved stack and field pointer... ++repeatCount; pstack = backupStack; curField = backupValue; repeating = true; state = STATE_EXPECTVALUEORDELIMITER; break; default: break; // anything else: continue spinning } } // Parser state table void (HL7Parser::* HL7Parser::States[])() = { &HL7Parser::doStateExpectSegmentName, // STATE_EXPECTSEGMENTNAME &HL7Parser::doStateMSHSpecialChars, // STATE_MSHSPECIALCHARS &HL7Parser::doStateExpectDelimiter, // STATE_EXPECTDELIMITER &HL7Parser::doStateExpectValueOrDelimiter, // STATE_EXPECTVALUEORDELIMITER &HL7Parser::doStateSpin, // STATE_SPIN }; // // HL7Parser::parse // // Takes a full HL7 message made up of an arbitrary sequence of segments and // transforms it into a series of HL7Segment descendant object instances // containing trees of HL7Value instances, which contain both the field name and // its value (if any) that was discovered in the HL7 segment. // // After this call, check the error state to see if parsing was fully successful // or not. Even if it failed, there may be valid data inside the value tree(s). // // You own the SegmentVector upon return. Free it when you're done using // HL7Parser::DestroySegments, unless you really like memory leaks ;) // HL7Parser::SegmentVector *HL7Parser::parse() { segments = new SegmentVector; while(1) { (this->*States[state])(); if(state == STATE_DONE || state == STATE_STOP) break; } // did we stop because of an error? then flag it. if(state == STATE_STOP) { errorState = true; DEBUGOUT(dbg_parser, "An error occurred during parsing"); } SegmentVector *retval = segments; segments = NULL; // we don't own it any more! return retval; } // // HL7Parser::loadFile // // Load an input file for parsing. // bool HL7Parser::loadFile(const char *filename) { if(input) delete [] input; input = LoadTextFile(filename); if(!input) return false; // reset all internal state tokenizer.setInput(input); tokenizer.setDisableEscapes(false); state = STATE_EXPECTSEGMENTNAME; segments = NULL; curSegment = NULL; curValue = ""; curField = NULL; delimiterError = false; errorState = false; errorCode = HL7_ERR_NOERROR; while(!pstack.empty()) pstack.pop(); repeating = false; backupStack = pstack; backupValue = NULL; repeatCount = 0; return true; } // // HL7Parser::loadString // // For testing only, don't abuse it :P // void HL7Parser::loadString(const char *text) { if(input) delete [] input; input = cpp_strdup(text); if(!input) return; // reset all internal state tokenizer.setInput(input); tokenizer.setDisableEscapes(false); state = STATE_EXPECTSEGMENTNAME; segments = NULL; curSegment = NULL; curValue = ""; curField = NULL; delimiterError = false; errorState = false; errorCode = HL7_ERR_NOERROR; while(!pstack.empty()) pstack.pop(); repeating = false; backupStack = pstack; backupValue = NULL; repeatCount = 0; } // // HL7Parser::getErrorMessage // // Return the error message for the errorCode that occurred during parsing. // const char *HL7Parser::getErrorMessage() const { return errorMessages[errorCode]; } //============================================================================= // // Old parser test code; left here for reference. // /* // Another test function static void TestPrintHL7ValueTree(HL7Value *root, int numComponents) { for(int i = 0; i < numComponents && root->fieldname; i++) { if(root->numExpectedSubComponents) TestPrintHL7ValueTree(root->subComponents, root->numExpectedSubComponents); else { cout << " field:\t" << root->fieldname << "\n" << " level:\t" << root->level << "\n" << " value:\t" << root->value << "\n"; if(root->repeatValues) { // NB: these will print out in reverse order... // Getting them in input order requires recursing down the // list. HL7Value *reprover = root->repeatValues; while(reprover) { cout << " repeat:\t" << reprover->value << "\n"; reprover = reprover->repeatValues; } } cout << endl; } ++root; } } // HL7 tree print test function 2 - only printing fields with a value in them. static void TestPrintHL7ValueTree2(HL7Value *parent, HL7Value *root, int numComponents) { for(int i = 0; i < numComponents && root->fieldname; i++) { if(root->numExpectedSubComponents) TestPrintHL7ValueTree2(root, root->subComponents, root->numExpectedSubComponents); else if(root->value != "" || root->repeatValues) { if(parent) { cout << " " << parent->fieldname << "::field:\t" << root->fieldname << "\n" << " " << parent->fieldname << "::level:\t" << root->level << "\n" << " " << parent->fieldname << "::value:\t" << root->value << "\n"; } else { cout << " field:\t" << root->fieldname << "\n" << " level:\t" << root->level << "\n" << " value:\t" << root->value << "\n"; } if(root->repeatValues) { HL7Value *reprover = root->repeatValues; while(reprover) { cout << " repeat:\t" << reprover->value << "\n"; reprover = reprover->repeatValues; } } cout << endl; } ++root; } } */ // EOF