/* MIT Licence Copyright (c) 2002 Seairth Jacobs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ //--------------------------------------------------------------------------- #include #include "onxep.h" //macros #define IS_LETTER(c) (((c >= 0x41) && (c <= 0x5A)) || ((c >= 0x61) && (c <= 0x7A)) || ((c >= 0xC0) && (c <= 0xD6)) || (c >= 0xF8)) #define IS_DIGIT(c) ((c >= 0x30) && (c <= 0x39)) #define IS_NAMECHAR(c) (IS_LETTER(c) || IS_DIGIT(c) || (c == 0x5F)) #define IS_WHITESPACE(c) ((c == 0x09) || (c == 0x0A) || (c == 0x0D) || (c == 0x20)) //the following contants and structure is used when parsing is done with delayed events #define ONXEVENT_ENTERCONTAINERNODE 0 #define ONXEVENT_EXITCONTAINERNODE 1 #define ONXEVENT_VALUENODE 2 struct ONXEventStruct { int nEventType; char * pszName; ONXValueNodeValue * pValues; unsigned int nValueLength; ONXEventStruct * pNext; }; //--------------------------------------------------------------------------- ONXEventParser::ONXEventParser(ONXEventHandler * pEvHandler) { if(! pEvHandler){ // must always have an event handler. If none is provided, // then a the default one will be used (which does nothing // but eat the events. pEventHandler = new ONXEventHandler(); } else { pEventHandler = pEvHandler; } bDelayedEvents = false; } //--------------------------------------------------------------------------- ONXEventParser::~ONXEventParser() { delete pEventHandler; pEventHandler = NULL; } //--------------------------------------------------------------------------- bool ONXEventParser::Parse(char * pszSource) { bool bSuccess = true; int nLevel = 0; // copy the source string into a buffer. This way, the calling program // will still have a copy to reference in case of any errors encountered. // The parsing routine will take this buffer and chop it up into several // small strings, which it will then pass on through events to the calling // program. int nBufLen = strlen(pszSource); char * pszONXBuffer = new char[nBufLen + 1]; strcpy(pszONXBuffer, pszSource); pszONXBuffer[nBufLen] = 0; char * pszONX = pszONXBuffer; char * pszName = NULL; char * pszValue = NULL; char * pszTemp = NULL; unsigned int nValuePoolSize = 1024; ONXValueNodeValue * pValues = new ONXValueNodeValue[nValuePoolSize]; unsigned int nDelayedValues = 0; unsigned int nValues = 0; // maximum ContainerNode depth that can be processed (arbitrary limit) char * pNodes[1024]; // temporary variables used to process escape sequences char cESChar; unsigned int nESLength; unsigned int nHexLength; bool lBOV; bool lEOV; bool lCharData; // when delayed events are specified, store each event in a linked list. The // first node is a dummy node for simplification of code. ONXEventStruct * pEvents = new ONXEventStruct; ONXEventStruct * pEventTail = pEvents; pEventTail->pNext = NULL; while(*pszONX && bSuccess) { // The outer switch looks for either the beginning of a node or whitespace switch(*pszONX) { case ':': // Have we encountered a node? // First, see if we have a valid name pszONX++; // Name ::= (Letter | '_' ) (NameChar)* // NameChar ::= (Letter | Digit | '_')+ // Letter ::= [0x41-0x5A] | [0x61-0x7A] | [0xC0-0xD6] | [0xD8-0xF6] | [0xF8-0xFF] // Digit ::= [0x30-0x39] if((*pszONX == '_') || IS_LETTER((unsigned char)*pszONX)) { // This appears to be the beginning of a node name pszName = pszONX; pszONX++; while(IS_NAMECHAR((unsigned char)*pszONX)) { // this is a part of the name pszONX++; } // now, we have reached the first non-name character! switch(*pszONX) { case '{': // ContainerNode *pszONX = 0; pszONX++; if((nLevel == 0) && (strcmp(pszName, "onx") != 0)) { // This is not a valid RootNode bSuccess = pEventHandler->Error(ONX_INVALID_ROOTNODE, pszONX - pszONXBuffer); } else { // This is either the RootNode or another ContainerNode. pNodes[nLevel] = pszName; nLevel++; if(bDelayedEvents) { pEventTail->pNext = new ONXEventStruct; pEventTail = pEventTail->pNext; pEventTail->pNext = NULL; pEventTail->nEventType = ONXEVENT_ENTERCONTAINERNODE; pEventTail->pszName = pszName; } else { bSuccess = pEventHandler->EnterContainerNode(pszName); } } break; case '[': // ValueNode *pszONX = 0; pszONX++; nValues = 0; lEOV = false; // now, look for quoted strings... while(*pszONX && bSuccess) { switch(*pszONX) { case '\'': case '\"': // beginning of a character-type value lCharData = (*pszONX == '\"'); pszONX++; // initialize pointers pszValue = pszONX; pszTemp = pszValue; while(*pszONX && bSuccess) { if(*pszONX == '\\') { // see what the next character is in the escape sequence pszONX++; switch(*pszONX) { case '\\': case '\"': *pszTemp = *pszONX; pszONX++; pszTemp++; break; case '0': // this is a special escape for the null character. // as a result, we will replace the zero character // with a null character *pszTemp = 0; pszONX++; pszTemp++; break; case 'x': // we are processing a Hexadecimal value pszONX++; if(IS_DIGIT(*pszONX)) { cESChar = (*pszONX - '0') << 4; } else if((*pszONX >= 'A') && (*pszONX <= 'F')) { cESChar = ((*pszONX - 'A') + 10) << 4; } else if((*pszONX >= 'a') && (*pszONX <= 'f')) { cESChar = ((*pszONX - 'a') + 10) << 4; } else { // This is an invalid escape sequence bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; break; } pszONX++; if(IS_DIGIT(*pszONX)) { cESChar += (*pszONX - '0'); } else if((*pszONX >= 'A') && (*pszONX <= 'F')) { cESChar += (*pszONX - 'A') + 10; } else if((*pszONX >= 'a') && (*pszONX <= 'f')) { cESChar = (*pszONX - 'a') + 10; } else { // This is an invalid escape sequence bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; break; } // now, replace the last hex digit witht the // decoded value. *pszTemp = cESChar; pszONX++; pszTemp++; break; case '[': // we are processing a Hexadecimal value // if the following test is true, then we have encountered // this at the beginning of the value. As a result, if we // get through this part, just move the pszValue forward // instead of doing a memcpy(). This should improve // performance, possibly significantly. lBOV = (pszONX == pszValue + 2); pszONX++; nESLength = 0; nHexLength = 0; while(*pszONX != ']') { if(nHexLength > 7) { bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; break; } else { nHexLength++; } if(IS_DIGIT(*pszONX)) { cESChar = (*pszONX - '0'); } else if((*pszONX >= 'A') && (*pszONX <= 'F')) { cESChar = (*pszONX - 'A') + 10; } else if((*pszONX >= 'a') && (*pszONX <= 'f')) { cESChar = (*pszONX - 'a') + 10; } else { // This is an invalid escape sequence bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; break; } nESLength = (nESLength << 4) + cESChar; nHexLength++; pszONX++; } if(bSuccess) { if(nESLength == 0) { // This is an invalid escape sequence since zero is an invalid value bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; break; } else { pszONX++; if(lBOV) { // since this is the beginning of the value, just move the // pszValue pointer instead of doing a copy. pszValue = pszONX; pszONX += nESLength; pszTemp = pszONX; } else { // copy the buffer without looking at what's inside it. memcpy(pszTemp, pszONX, nESLength); pszONX += nESLength; pszTemp += nESLength; } } } break; default: // This is an invalid escape sequence bSuccess = pEventHandler->Error(ONX_INVALID_ESCAPE_SEQUENCE, pszONX - pszONXBuffer); bSuccess = false; } } else if(*pszONX == '\"') { // we are at the end of the value! *pszTemp = 0; pszONX++; pValues[nDelayedValues + nValues].value = pszValue; pValues[nDelayedValues + nValues].length = pszTemp - pszValue; nValues++; break; } else { // Valid characters: [0x01-0x21] | [0x23-0x5B] | [0x5D-0xFF] if((*pszONX != 0) && (*pszONX != '\"') && (*pszONX != '\\')) { // this is a valid character. if(pszTemp != pszONX) { // when processing escape sequences, it is possible // for the pszTemp and pszONX pointers to become // unsynchronized. *pszTemp = *pszONX; } pszONX++; pszTemp++; } else { // We have encountered a character that is not allowed // inside the content block. bSuccess = pEventHandler->Error(ONX_INVALID_VALUENODE, pszONX - pszONXBuffer); bSuccess = false; } } // if } // while break; case ']': // end of the ValueNode lEOV = true; // test optional end-name pszONX++; // check to see if the formal version is used. First look at the first character. if(*pszONX == *pszName) { if(strncmp(pszName, pszONX, strlen(pszName)) == 0) { pszONX += strlen(pszName); if((*pszONX != 0) && (! IS_WHITESPACE(*pszONX)) && (*pszONX != ':')) { // this was suppose to be the closing rootnode, but it appears to // be invalid... bSuccess = pEventHandler->Error(ONX_INVALID_VALUENODE, pszName - pszONXBuffer); bSuccess = false; } else { // pszTemp - pszValue = length of the value if(bDelayedEvents) { pEventTail->pNext = new ONXEventStruct; pEventTail = pEventTail->pNext; pEventTail->pNext = NULL; pEventTail->nEventType = ONXEVENT_VALUENODE; pEventTail->pszName = pszName; pEventTail->pValues = &pValues[nDelayedValues]; pEventTail->nValueLength = nValues; nDelayedValues += nValues; } else { bSuccess = pEventHandler->ValueNode(pszName, pValues, nValues); } } } else { // invalid! bSuccess = pEventHandler->Error(ONX_INVALID_VALUENODE, pszName - pszONXBuffer); bSuccess = false; } } else { // pszTemp - pszValue = length of the value if(bDelayedEvents) { pEventTail->pNext = new ONXEventStruct; pEventTail = pEventTail->pNext; pEventTail->pNext = NULL; pEventTail->nEventType = ONXEVENT_VALUENODE; pEventTail->pszName = pszName; pEventTail->pValues = &pValues[nDelayedValues]; pEventTail->nValueLength = nValues; nDelayedValues += nValues; } else { bSuccess = pEventHandler->ValueNode(pszName, pValues, nValues); } } pszONX++; break; default: if(IS_WHITESPACE(*pszONX)) { // whitespace. Ignore. pszONX++; } else { // error! bSuccess = pEventHandler->Error(ONX_INVALID_VALUENODE, pszONX - pszONXBuffer); bSuccess = false; } } // switch(*pszONX) if(lEOV) break; } // while(*pszONX && bSuccess) break; default: // Error! bSuccess = pEventHandler->Error(ONX_INVALID_NAME, pszONX - pszONXBuffer); bSuccess = false; } // switch(*pszONX) } else { // Error! bSuccess = pEventHandler->Error(ONX_INVALID_NAME, pszONX - pszONXBuffer); bSuccess = false; } break; case '}': // End of a ContainerNode // 1) Test ContainerNode Depth // 2) For all level > 1, test optional end-name // 3) For level = 1, test required "onx" // 4) Else Error! if(nLevel > 0) { pszONX++; // check to see if the formal version is used. First look at the first character. if(*pszONX == *pNodes[nLevel-1]) { if(strncmp(pNodes[nLevel-1], pszONX, strlen(pNodes[nLevel-1])) == 0) { pszONX += strlen(pNodes[nLevel - 1]); if((nLevel == 1) && (*pszONX != 0) && (! IS_WHITESPACE(*pszONX)) && (*pszONX != ':')) { // this was suppose to be the closing rootnode, but it appears to // be invalid... bSuccess = pEventHandler->Error(ONX_INVALID_ROOTNODE, pszName - pszONXBuffer); bSuccess = false; } else { if(bDelayedEvents) { pEventTail->pNext = new ONXEventStruct; pEventTail = pEventTail->pNext; pEventTail->pNext = NULL; pEventTail->nEventType = ONXEVENT_EXITCONTAINERNODE; pEventTail->pszName = pNodes[nLevel-1]; } else { bSuccess = pEventHandler->ExitContainerNode(pNodes[nLevel-1]); } nLevel--; } } else { // invalid! bSuccess = pEventHandler->Error(ONX_INVALID_NODE, pszName - pszONXBuffer); bSuccess = false; } } else { // this is likely the short version, so send the event... if(nLevel == 1) { // This should have been the root node! This is invalid! bSuccess = pEventHandler->Error(ONX_INVALID_ROOTNODE, pszName - pszONXBuffer); bSuccess = false; } else { if(bDelayedEvents) { pEventTail->pNext = new ONXEventStruct; pEventTail = pEventTail->pNext; pEventTail->pNext = NULL; pEventTail->nEventType = ONXEVENT_EXITCONTAINERNODE; pEventTail->pszName = pNodes[nLevel-1]; } else { bSuccess = pEventHandler->ExitContainerNode(pNodes[nLevel-1]); } nLevel--; } } // At this point, the node could still be invalid. However, this will be detected // on the outer loop. } else { // we are not nested, so this is an extra (and invalid "}") bSuccess = pEventHandler->Error(ONX_INVALID_ROOTNODE, pszONX - pszONXBuffer); bSuccess = false; } break; default: if(IS_WHITESPACE(*pszONX)) { pszONX++; } else { // Error! this is an invalid bSuccess = pEventHandler->Error(ONX_INVALID_NODE, pszONX - pszONXBuffer); bSuccess = false; } } // switch(*pszONX) } // while(*pszONX && bSuccess) // delete the dummy event record pEventTail = pEvents->pNext; delete pEvents; pEvents = pEventTail; if(bDelayedEvents) { // go through the remaining events and fire them. After each event // is fired, delete the record! while(pEvents) { // only fire the event if bSuccess = .T. (meaning that no errors // or caller-requested cancellations have occured up to this point) if(bSuccess) { switch(pEvents->nEventType) { case ONXEVENT_ENTERCONTAINERNODE: bSuccess = pEventHandler->EnterContainerNode(pEvents->pszName); break; case ONXEVENT_EXITCONTAINERNODE: bSuccess = pEventHandler->ExitContainerNode(pEvents->pszName); break; case ONXEVENT_VALUENODE: bSuccess = pEventHandler->ValueNode(pEvents->pszName, pEvents->pValues, pEvents->nValueLength); break; } } pEventTail = pEvents->pNext; delete pEvents; pEvents = pEventTail; } } delete[] pValues; delete[] pszONXBuffer; pszONX = NULL; return bSuccess; }