From 912b61c44270cfa540eccf40fcc3208581b55439 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 5 Dec 2023 14:48:57 +0300 Subject: Fix CVE-2018-1311 --- libxerces-c/xercesc/dtd-decl-use-after-free.patch | 596 ++++++++++++++++++++++ libxerces-c/xercesc/internal/DGXMLScanner.cpp | 6 +- libxerces-c/xercesc/internal/IGXMLScanner.cpp | 6 +- libxerces-c/xercesc/internal/ReaderMgr.cpp | 207 +++++--- libxerces-c/xercesc/internal/ReaderMgr.hpp | 92 +++- 5 files changed, 825 insertions(+), 82 deletions(-) create mode 100644 libxerces-c/xercesc/dtd-decl-use-after-free.patch diff --git a/libxerces-c/xercesc/dtd-decl-use-after-free.patch b/libxerces-c/xercesc/dtd-decl-use-after-free.patch new file mode 100644 index 0000000..ad13ca0 --- /dev/null +++ b/libxerces-c/xercesc/dtd-decl-use-after-free.patch @@ -0,0 +1,596 @@ +diff --git a/libxerces-c/xercesc/internal/DGXMLScanner.cpp b/libxerces-c/xercesc/internal/DGXMLScanner.cpp +index 4334223..895dfeb 100644 +--- a/libxerces-c/xercesc/internal/DGXMLScanner.cpp ++++ b/libxerces-c/xercesc/internal/DGXMLScanner.cpp +@@ -1052,13 +1052,12 @@ void DGXMLScanner::scanDocTypeDecl() + DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); + declDTD->setSystemId(sysId); + declDTD->setIsExternal(true); +- Janitor janDecl(declDTD); + + // Mark this one as a throw at end + reader->setThrowAtEnd(true); + + // And push it onto the stack, with its pseudo name +- fReaderMgr.pushReader(reader, declDTD); ++ fReaderMgr.pushReaderAdoptEntity(reader, declDTD); + + // Tell it its not in an include section + dtdScanner.scanExtSubsetDecl(false, true); +@@ -2131,13 +2130,12 @@ Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src, + DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); + declDTD->setSystemId(src.getSystemId()); + declDTD->setIsExternal(true); +- Janitor janDecl(declDTD); + + // Mark this one as a throw at end + newReader->setThrowAtEnd(true); + + // And push it onto the stack, with its pseudo name +- fReaderMgr.pushReader(newReader, declDTD); ++ fReaderMgr.pushReaderAdoptEntity(newReader, declDTD); + + // If we have a doc type handler and advanced callbacks are enabled, + // call the doctype event. +diff --git a/libxerces-c/xercesc/internal/IGXMLScanner.cpp b/libxerces-c/xercesc/internal/IGXMLScanner.cpp +index 912ec0c..d694b23 100644 +--- a/libxerces-c/xercesc/internal/IGXMLScanner.cpp ++++ b/libxerces-c/xercesc/internal/IGXMLScanner.cpp +@@ -1535,13 +1535,12 @@ void IGXMLScanner::scanDocTypeDecl() + DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); + declDTD->setSystemId(sysId); + declDTD->setIsExternal(true); +- Janitor janDecl(declDTD); + + // Mark this one as a throw at end + reader->setThrowAtEnd(true); + + // And push it onto the stack, with its pseudo name +- fReaderMgr.pushReader(reader, declDTD); ++ fReaderMgr.pushReaderAdoptEntity(reader, declDTD); + + // Tell it its not in an include section + dtdScanner.scanExtSubsetDecl(false, true); +@@ -3098,13 +3097,12 @@ Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src, + DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); + declDTD->setSystemId(src.getSystemId()); + declDTD->setIsExternal(true); +- Janitor janDecl(declDTD); + + // Mark this one as a throw at end + newReader->setThrowAtEnd(true); + + // And push it onto the stack, with its pseudo name +- fReaderMgr.pushReader(newReader, declDTD); ++ fReaderMgr.pushReaderAdoptEntity(newReader, declDTD); + + // If we have a doc type handler and advanced callbacks are enabled, + // call the doctype event. +diff --git a/libxerces-c/xercesc/internal/ReaderMgr.cpp b/libxerces-c/xercesc/internal/ReaderMgr.cpp +index d14483e..369a1ba 100644 +--- a/libxerces-c/xercesc/internal/ReaderMgr.cpp ++++ b/libxerces-c/xercesc/internal/ReaderMgr.cpp +@@ -45,12 +45,61 @@ + + XERCES_CPP_NAMESPACE_BEGIN + ++// --------------------------------------------------------------------------- ++// ReaderMgr::ReaderData: Constructors and Destructor ++// --------------------------------------------------------------------------- ++ReaderMgr::ReaderData::ReaderData( XMLReader* const reader ++ , XMLEntityDecl* const entity ++ , const bool adoptEntity) : ++ ++ fReader(reader) ++ , fEntity(entity) ++ , fEntityAdopted(adoptEntity) ++{ ++} ++ ++ReaderMgr::ReaderData::~ReaderData() ++{ ++ delete fReader; ++ ++ if (fEntityAdopted) ++ delete fEntity; ++} ++ ++// --------------------------------------------------------------------------- ++// ReaderMgr::ReaderData: Getter methods ++// --------------------------------------------------------------------------- ++XMLReader* ReaderMgr::ReaderData::getReader() const ++{ ++ return fReader; ++} ++ ++XMLEntityDecl* ReaderMgr::ReaderData::getEntity() const ++{ ++ return fEntity; ++} ++ ++bool ReaderMgr::ReaderData::getEntityAdopted() const ++{ ++ return fEntityAdopted; ++} ++ ++// ++// This method gives up the entity object ownership but leaves the fEntity ++// pointer intact. ++// ++XMLEntityDecl* ReaderMgr::ReaderData::releaseEntity() ++{ ++ fEntityAdopted = false; ++ return fEntity; ++} ++ + // --------------------------------------------------------------------------- + // ReaderMgr: Constructors and Destructor + // --------------------------------------------------------------------------- + ReaderMgr::ReaderMgr(MemoryManager* const manager) : + +- fCurEntity(0) ++ fCurReaderData(0) + , fCurReader(0) + , fEntityHandler(0) + , fEntityStack(0) +@@ -66,12 +115,11 @@ ReaderMgr::ReaderMgr(MemoryManager* const manager) : + ReaderMgr::~ReaderMgr() + { + // +- // Clean up the reader and entity stacks. Note that we don't own the +- // entities, so we don't delete the current entity (and the entity stack +- // does not own its elements either, so deleting it will not delete the +- // entities it still references!) ++ // Clean up the reader stack and orphan entities container. Note that ++ // all adopted entities (potentially contained in fCurReaderData, ++ // fReaderStack, and fEntityStack) are deleted here. + // +- delete fCurReader; ++ delete fCurReaderData; + delete fReaderStack; + delete fEntityStack; + } +@@ -357,9 +405,9 @@ void ReaderMgr::cleanStackBackTo(const XMLSize_t readerNum) + if (fReaderStack->empty()) + ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::RdrMgr_ReaderIdNotFound, fMemoryManager); + +- delete fCurReader; +- fCurReader = fReaderStack->pop(); +- fCurEntity = fEntityStack->pop(); ++ delete fCurReaderData; ++ fCurReaderData = fReaderStack->pop(); ++ fCurReader = fCurReaderData->getReader (); + } + } + +@@ -795,20 +843,20 @@ const XMLCh* ReaderMgr::getCurrentEncodingStr() const + + const XMLEntityDecl* ReaderMgr::getCurrentEntity() const + { +- return fCurEntity; ++ return fCurReaderData? fCurReaderData->getEntity() : 0; + } + + + XMLEntityDecl* ReaderMgr::getCurrentEntity() + { +- return fCurEntity; ++ return fCurReaderData? fCurReaderData->getEntity() : 0; + } + + + XMLSize_t ReaderMgr::getReaderDepth() const + { + // If the stack doesn't exist, its obviously zero +- if (!fEntityStack) ++ if (!fReaderStack) + return 0; + + // +@@ -816,7 +864,7 @@ XMLSize_t ReaderMgr::getReaderDepth() const + // reader. So if there is no current reader and none on the stack, + // its zero, else its some non-zero value. + // +- XMLSize_t retVal = fEntityStack->size(); ++ XMLSize_t retVal = fReaderStack->size(); + if (fCurReader) + retVal++; + return retVal; +@@ -852,7 +900,7 @@ void ReaderMgr::getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const + bool ReaderMgr::isScanningPERefOutOfLiteral() const + { + // If the current reader is not for an entity, then definitely not +- if (!fCurEntity) ++ if (!fCurReaderData || !fCurReaderData->getEntity()) + return false; + + // +@@ -867,13 +915,19 @@ bool ReaderMgr::isScanningPERefOutOfLiteral() const + return false; + } + +- + bool ReaderMgr::pushReader( XMLReader* const reader + , XMLEntityDecl* const entity) ++{ ++ return pushReaderAdoptEntity(reader, entity, false); ++} ++ ++bool ReaderMgr::pushReaderAdoptEntity( XMLReader* const reader ++ , XMLEntityDecl* const entity ++ , const bool adoptEntity) + { + // + // First, if an entity was passed, we have to confirm that this entity +- // is not already on the entity stack. If so, then this is a recursive ++ // is not already on the reader stack. If so, then this is a recursive + // entity expansion, so we issue an error and refuse to put the reader + // on the stack. + // +@@ -881,19 +935,30 @@ bool ReaderMgr::pushReader( XMLReader* const reader + // nothing to do. If there is no entity stack yet, then of coures it + // cannot already be there. + // +- if (entity && fEntityStack) ++ if (entity && fReaderStack) + { +- const XMLSize_t count = fEntityStack->size(); ++ // @@ Strangely, we don't check the entity at the top of the stack ++ // (fCurReaderData). Is it a bug? ++ // ++ const XMLSize_t count = fReaderStack->size(); + const XMLCh* const theName = entity->getName(); + for (XMLSize_t index = 0; index < count; index++) + { +- const XMLEntityDecl* curDecl = fEntityStack->elementAt(index); ++ const XMLEntityDecl* curDecl = ++ fReaderStack->elementAt(index)->getEntity(); ++ + if (curDecl) + { + if (XMLString::equals(theName, curDecl->getName())) + { +- // Oops, already there so delete reader and return ++ // Oops, already there so delete reader and entity and ++ // return. ++ // + delete reader; ++ ++ if (adoptEntity) ++ delete entity; ++ + return false; + } + } +@@ -905,52 +970,37 @@ bool ReaderMgr::pushReader( XMLReader* const reader + // tell it it does own its elements. + // + if (!fReaderStack) +- fReaderStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); +- +- // And the entity stack, which does not own its elements +- if (!fEntityStack) +- fEntityStack = new (fMemoryManager) RefStackOf(16, false, fMemoryManager); ++ fReaderStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); + + // +- // Push the current reader and entity onto their respective stacks. +- // Note that the the current entity can be null if the current reader +- // is not for an entity. ++ // Push the current reader and entity onto the stack. Note that ++ // the current entity can be null if the current reader is not for ++ // an entity. + // +- if (fCurReader) +- { +- fReaderStack->push(fCurReader); +- fEntityStack->push(fCurEntity); +- } ++ if (fCurReaderData) ++ fReaderStack->push(fCurReaderData); + + // + // Make the passed reader and entity the current top of stack. The + // passed entity can (and often is) null. + // ++ fCurReaderData = new (fMemoryManager) ReaderData(reader, entity, adoptEntity); + fCurReader = reader; +- fCurEntity = entity; + + return true; + } + +- + void ReaderMgr::reset() + { + // Reset all of the flags + fThrowEOE = false; + + // Delete the current reader and flush the reader stack +- delete fCurReader; ++ delete fCurReaderData; ++ fCurReaderData = 0; + fCurReader = 0; + if (fReaderStack) + fReaderStack->removeAllElements(); +- +- // +- // And do the same for the entity stack, but don't delete the current +- // entity (if any) since we don't own them. +- // +- fCurEntity = 0; +- if (fEntityStack) +- fEntityStack->removeAllElements(); + } + + +@@ -1014,7 +1064,9 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const + // search the stack; else, keep the reader that we've got since its + // either an external entity reader or the main file reader. + // +- const XMLEntityDecl* curEntity = fCurEntity; ++ const XMLEntityDecl* curEntity = ++ fCurReaderData? fCurReaderData->getEntity() : 0; ++ + if (curEntity && !curEntity->isExternal()) + { + XMLSize_t index = fReaderStack->size(); +@@ -1024,7 +1076,7 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const + { + // Move down to the previous element and get a pointer to it + index--; +- curEntity = fEntityStack->elementAt(index); ++ curEntity = fReaderStack->elementAt(index)->getEntity(); + + // + // If its null or its an external entity, then this reader +@@ -1032,12 +1084,12 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const + // + if (!curEntity) + { +- theReader = fReaderStack->elementAt(index); ++ theReader = fReaderStack->elementAt(index)->getReader (); + break; + } + else if (curEntity->isExternal()) + { +- theReader = fReaderStack->elementAt(index); ++ theReader = fReaderStack->elementAt(index)->getReader (); + break; + } + +@@ -1048,6 +1100,11 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const + } + } + ++ // @@ It feels like we may end up with theReader being from the top of ++ // the stack (fCurReader) and itsEntity being from the bottom of the ++ // stack (if there are no null or external entities on the stack). ++ // Is it a bug? ++ // + itsEntity = curEntity; + return theReader; + } +@@ -1059,31 +1116,59 @@ bool ReaderMgr::popReader() + // We didn't get any more, so try to pop off a reader. If the reader + // stack is empty, then we are at the end, so return false. + // ++ // @@ It feels like we never pop the reader pushed to the stack first ++ // (think of fReaderStack empty but fCurReader not null). Is it a ++ // bug? ++ // + if (fReaderStack->empty()) + return false; + + // +- // Remember the current entity, before we pop off a new one. We might ++ // Remember the current reader, before we pop off a new one. We might + // need this to throw the end of entity exception at the end. + // +- XMLEntityDecl* prevEntity = fCurEntity; ++ ReaderData* prevReaderData = fCurReaderData; + const bool prevReaderThrowAtEnd = fCurReader->getThrowAtEnd(); + const XMLSize_t readerNum = fCurReader->getReaderNum(); + + // +- // Delete the current reader and pop a new reader and entity off +- // the stacks. ++ // Pop a new reader and entity off the stack. + // +- delete fCurReader; +- fCurReader = fReaderStack->pop(); +- fCurEntity = fEntityStack->pop(); ++ fCurReaderData = fReaderStack->pop(); ++ fCurReader = fCurReaderData->getReader(); + + // + // If there was a previous entity, and either the fThrowEOE flag is set +- // or reader was marked as such, then throw an end of entity. ++ // or reader was marked as such, then throw an end of entity. Otherwise, ++ // delete the previous reader data. + // +- if (prevEntity && (fThrowEOE || prevReaderThrowAtEnd)) +- throw EndOfEntityException(prevEntity, readerNum); ++ if (prevReaderData->getEntity() && (fThrowEOE || prevReaderThrowAtEnd)) ++ { ++ // ++ // If the entity is adopted, then move it to fEntityStack so that ++ // its life-time is prolonged to the life-time of this reader ++ // manager. Also delete the previous reader data before throwing ++ // EndOfEntityException. ++ // ++ XMLEntityDecl* entity; ++ ++ if (prevReaderData->getEntityAdopted()) ++ { ++ if (!fEntityStack) ++ fEntityStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); ++ ++ entity = prevReaderData->releaseEntity(); ++ fEntityStack->push(entity); ++ } ++ else ++ entity = prevReaderData->getEntity(); ++ ++ delete prevReaderData; ++ ++ throw EndOfEntityException(entity, readerNum); ++ } ++ else ++ delete prevReaderData; + + while (true) + { +@@ -1113,9 +1198,9 @@ bool ReaderMgr::popReader() + return false; + + // Else pop again and try it one more time +- delete fCurReader; +- fCurReader = fReaderStack->pop(); +- fCurEntity = fEntityStack->pop(); ++ delete fCurReaderData; ++ fCurReaderData = fReaderStack->pop(); ++ fCurReader = fCurReaderData->getReader(); + } + return true; + } +diff --git a/libxerces-c/xercesc/internal/ReaderMgr.hpp b/libxerces-c/xercesc/internal/ReaderMgr.hpp +index f63b219..3209564 100644 +--- a/libxerces-c/xercesc/internal/ReaderMgr.hpp ++++ b/libxerces-c/xercesc/internal/ReaderMgr.hpp +@@ -160,6 +160,12 @@ public : + XMLReader* const reader + , XMLEntityDecl* const entity + ); ++ bool pushReaderAdoptEntity ++ ( ++ XMLReader* const reader ++ , XMLEntityDecl* const entity ++ , const bool adoptEntity = true ++ ); + void reset(); + + +@@ -208,16 +214,72 @@ private : + ReaderMgr(const ReaderMgr&); + ReaderMgr& operator=(const ReaderMgr&); + ++ // ----------------------------------------------------------------------- ++ // Private data types ++ // ----------------------------------------------------------------------- ++ class ReaderData : public XMemory ++ { ++ public : ++ // --------------------------------------------------------------------- ++ // Constructors and Destructor ++ // --------------------------------------------------------------------- ++ ReaderData ++ ( XMLReader* const reader ++ , XMLEntityDecl* const entity ++ , const bool adoptEntity ++ ); ++ ++ ~ReaderData(); ++ ++ // ---------------------------------------------------------------------- ++ // Getter methods ++ // ---------------------------------------------------------------------- ++ XMLReader* getReader() const; ++ XMLEntityDecl* getEntity() const; ++ bool getEntityAdopted() const; ++ ++ XMLEntityDecl* releaseEntity(); ++ ++ private : ++ // --------------------------------------------------------------------- ++ // Unimplemented constructors and operators ++ // --------------------------------------------------------------------- ++ ReaderData(); ++ ReaderData(const ReaderData&); ++ ReaderData& operator=(const ReaderData&); ++ ++ // --------------------------------------------------------------------- ++ // Private data members ++ // ++ // fReader ++ // This is the pointer to the reader object that must be destroyed ++ // when this object is destroyed. ++ // ++ // fEntity ++ // fEntityAdopted ++ // This is the pointer to the entity object that, if adopted, must ++ // be destroyed when this object is destroyed. ++ // ++ // Note that we need to keep up with which of the pushed readers ++ // are pushed entity values that are being spooled. This is done ++ // to avoid the problem of recursive definitions. ++ // --------------------------------------------------------------------- ++ XMLReader* fReader; ++ XMLEntityDecl* fEntity; ++ bool fEntityAdopted; ++ }; ++ + // ----------------------------------------------------------------------- + // Private data members + // +- // fCurEntity +- // This is the current top of stack entity. We pull it off the stack +- // and store it here for efficiency. ++ // fCurReaderData ++ // This is the current top of the reader data stack. We pull it off ++ // the stack and store it here for efficiency. + // + // fCurReader +- // This is the current top of stack reader. We pull it off the +- // stack and store it here for efficiency. ++ // This is the reader of the current top of the reader data stack. ++ // It contains the same value as fCurReaderData->fReader or NULL, ++ // if fCurReaderData is NULL. We store it here for efficiency. + // + // fEntityHandler + // This is the installed entity handler. Its installed via the +@@ -225,10 +287,14 @@ private : + // process of creating external entity readers. + // + // fEntityStack +- // We need to keep up with which of the pushed readers are pushed +- // entity values that are being spooled. This is done to avoid the +- // problem of recursive definitions. This stack consists of refs to +- // EntityDecl objects for the pushed entities. ++ // This is a storage of orphaned XMLEntityDecl objects. The ++ // popReader() function adds a reader manager-adopted entity object ++ // to this storage before passing its pointer to the constructor ++ // of the being thrown EndOfEntityException exception. This makes ++ // sure that the life-time of an entity exposed to the exception ++ // handlers is the same as the life-time of reader manager (and so ++ // normally the life-time of the scanner which embeds the reader ++ // manager). + // + // fNextReaderNum + // This is the reader serial number value. Each new reader that is +@@ -236,8 +302,8 @@ private : + // us catch things like partial markup errors and such. + // + // fReaderStack +- // This is the stack of reader references. We own all the readers +- // and destroy them when they are used up. ++ // This is the stack of reader data references. We own all the ++ // entries and destroy them when they are used up. + // + // fThrowEOE + // This flag controls whether we throw an exception when we hit an +@@ -252,12 +318,12 @@ private : + // fStandardUriConformant + // This flag controls whether we force conformant URI + // ----------------------------------------------------------------------- +- XMLEntityDecl* fCurEntity; ++ ReaderData* fCurReaderData; + XMLReader* fCurReader; + XMLEntityHandler* fEntityHandler; + RefStackOf* fEntityStack; + unsigned int fNextReaderNum; +- RefStackOf* fReaderStack; ++ RefStackOf* fReaderStack; + bool fThrowEOE; + XMLReader::XMLVersion fXMLVersion; + bool fStandardUriConformant; diff --git a/libxerces-c/xercesc/internal/DGXMLScanner.cpp b/libxerces-c/xercesc/internal/DGXMLScanner.cpp index 4334223..895dfeb 100644 --- a/libxerces-c/xercesc/internal/DGXMLScanner.cpp +++ b/libxerces-c/xercesc/internal/DGXMLScanner.cpp @@ -1052,13 +1052,12 @@ void DGXMLScanner::scanDocTypeDecl() DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); declDTD->setSystemId(sysId); declDTD->setIsExternal(true); - Janitor janDecl(declDTD); // Mark this one as a throw at end reader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name - fReaderMgr.pushReader(reader, declDTD); + fReaderMgr.pushReaderAdoptEntity(reader, declDTD); // Tell it its not in an include section dtdScanner.scanExtSubsetDecl(false, true); @@ -2131,13 +2130,12 @@ Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src, DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); declDTD->setSystemId(src.getSystemId()); declDTD->setIsExternal(true); - Janitor janDecl(declDTD); // Mark this one as a throw at end newReader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name - fReaderMgr.pushReader(newReader, declDTD); + fReaderMgr.pushReaderAdoptEntity(newReader, declDTD); // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. diff --git a/libxerces-c/xercesc/internal/IGXMLScanner.cpp b/libxerces-c/xercesc/internal/IGXMLScanner.cpp index 912ec0c..d694b23 100644 --- a/libxerces-c/xercesc/internal/IGXMLScanner.cpp +++ b/libxerces-c/xercesc/internal/IGXMLScanner.cpp @@ -1535,13 +1535,12 @@ void IGXMLScanner::scanDocTypeDecl() DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); declDTD->setSystemId(sysId); declDTD->setIsExternal(true); - Janitor janDecl(declDTD); // Mark this one as a throw at end reader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name - fReaderMgr.pushReader(reader, declDTD); + fReaderMgr.pushReaderAdoptEntity(reader, declDTD); // Tell it its not in an include section dtdScanner.scanExtSubsetDecl(false, true); @@ -3098,13 +3097,12 @@ Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src, DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); declDTD->setSystemId(src.getSystemId()); declDTD->setIsExternal(true); - Janitor janDecl(declDTD); // Mark this one as a throw at end newReader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name - fReaderMgr.pushReader(newReader, declDTD); + fReaderMgr.pushReaderAdoptEntity(newReader, declDTD); // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. diff --git a/libxerces-c/xercesc/internal/ReaderMgr.cpp b/libxerces-c/xercesc/internal/ReaderMgr.cpp index d14483e..369a1ba 100644 --- a/libxerces-c/xercesc/internal/ReaderMgr.cpp +++ b/libxerces-c/xercesc/internal/ReaderMgr.cpp @@ -46,11 +46,60 @@ XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- +// ReaderMgr::ReaderData: Constructors and Destructor +// --------------------------------------------------------------------------- +ReaderMgr::ReaderData::ReaderData( XMLReader* const reader + , XMLEntityDecl* const entity + , const bool adoptEntity) : + + fReader(reader) + , fEntity(entity) + , fEntityAdopted(adoptEntity) +{ +} + +ReaderMgr::ReaderData::~ReaderData() +{ + delete fReader; + + if (fEntityAdopted) + delete fEntity; +} + +// --------------------------------------------------------------------------- +// ReaderMgr::ReaderData: Getter methods +// --------------------------------------------------------------------------- +XMLReader* ReaderMgr::ReaderData::getReader() const +{ + return fReader; +} + +XMLEntityDecl* ReaderMgr::ReaderData::getEntity() const +{ + return fEntity; +} + +bool ReaderMgr::ReaderData::getEntityAdopted() const +{ + return fEntityAdopted; +} + +// +// This method gives up the entity object ownership but leaves the fEntity +// pointer intact. +// +XMLEntityDecl* ReaderMgr::ReaderData::releaseEntity() +{ + fEntityAdopted = false; + return fEntity; +} + +// --------------------------------------------------------------------------- // ReaderMgr: Constructors and Destructor // --------------------------------------------------------------------------- ReaderMgr::ReaderMgr(MemoryManager* const manager) : - fCurEntity(0) + fCurReaderData(0) , fCurReader(0) , fEntityHandler(0) , fEntityStack(0) @@ -66,12 +115,11 @@ ReaderMgr::ReaderMgr(MemoryManager* const manager) : ReaderMgr::~ReaderMgr() { // - // Clean up the reader and entity stacks. Note that we don't own the - // entities, so we don't delete the current entity (and the entity stack - // does not own its elements either, so deleting it will not delete the - // entities it still references!) + // Clean up the reader stack and orphan entities container. Note that + // all adopted entities (potentially contained in fCurReaderData, + // fReaderStack, and fEntityStack) are deleted here. // - delete fCurReader; + delete fCurReaderData; delete fReaderStack; delete fEntityStack; } @@ -357,9 +405,9 @@ void ReaderMgr::cleanStackBackTo(const XMLSize_t readerNum) if (fReaderStack->empty()) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::RdrMgr_ReaderIdNotFound, fMemoryManager); - delete fCurReader; - fCurReader = fReaderStack->pop(); - fCurEntity = fEntityStack->pop(); + delete fCurReaderData; + fCurReaderData = fReaderStack->pop(); + fCurReader = fCurReaderData->getReader (); } } @@ -795,20 +843,20 @@ const XMLCh* ReaderMgr::getCurrentEncodingStr() const const XMLEntityDecl* ReaderMgr::getCurrentEntity() const { - return fCurEntity; + return fCurReaderData? fCurReaderData->getEntity() : 0; } XMLEntityDecl* ReaderMgr::getCurrentEntity() { - return fCurEntity; + return fCurReaderData? fCurReaderData->getEntity() : 0; } XMLSize_t ReaderMgr::getReaderDepth() const { // If the stack doesn't exist, its obviously zero - if (!fEntityStack) + if (!fReaderStack) return 0; // @@ -816,7 +864,7 @@ XMLSize_t ReaderMgr::getReaderDepth() const // reader. So if there is no current reader and none on the stack, // its zero, else its some non-zero value. // - XMLSize_t retVal = fEntityStack->size(); + XMLSize_t retVal = fReaderStack->size(); if (fCurReader) retVal++; return retVal; @@ -852,7 +900,7 @@ void ReaderMgr::getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const bool ReaderMgr::isScanningPERefOutOfLiteral() const { // If the current reader is not for an entity, then definitely not - if (!fCurEntity) + if (!fCurReaderData || !fCurReaderData->getEntity()) return false; // @@ -867,13 +915,19 @@ bool ReaderMgr::isScanningPERefOutOfLiteral() const return false; } - bool ReaderMgr::pushReader( XMLReader* const reader , XMLEntityDecl* const entity) { + return pushReaderAdoptEntity(reader, entity, false); +} + +bool ReaderMgr::pushReaderAdoptEntity( XMLReader* const reader + , XMLEntityDecl* const entity + , const bool adoptEntity) +{ // // First, if an entity was passed, we have to confirm that this entity - // is not already on the entity stack. If so, then this is a recursive + // is not already on the reader stack. If so, then this is a recursive // entity expansion, so we issue an error and refuse to put the reader // on the stack. // @@ -881,19 +935,30 @@ bool ReaderMgr::pushReader( XMLReader* const reader // nothing to do. If there is no entity stack yet, then of coures it // cannot already be there. // - if (entity && fEntityStack) + if (entity && fReaderStack) { - const XMLSize_t count = fEntityStack->size(); + // @@ Strangely, we don't check the entity at the top of the stack + // (fCurReaderData). Is it a bug? + // + const XMLSize_t count = fReaderStack->size(); const XMLCh* const theName = entity->getName(); for (XMLSize_t index = 0; index < count; index++) { - const XMLEntityDecl* curDecl = fEntityStack->elementAt(index); + const XMLEntityDecl* curDecl = + fReaderStack->elementAt(index)->getEntity(); + if (curDecl) { if (XMLString::equals(theName, curDecl->getName())) { - // Oops, already there so delete reader and return + // Oops, already there so delete reader and entity and + // return. + // delete reader; + + if (adoptEntity) + delete entity; + return false; } } @@ -905,52 +970,37 @@ bool ReaderMgr::pushReader( XMLReader* const reader // tell it it does own its elements. // if (!fReaderStack) - fReaderStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); - - // And the entity stack, which does not own its elements - if (!fEntityStack) - fEntityStack = new (fMemoryManager) RefStackOf(16, false, fMemoryManager); + fReaderStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); // - // Push the current reader and entity onto their respective stacks. - // Note that the the current entity can be null if the current reader - // is not for an entity. + // Push the current reader and entity onto the stack. Note that + // the current entity can be null if the current reader is not for + // an entity. // - if (fCurReader) - { - fReaderStack->push(fCurReader); - fEntityStack->push(fCurEntity); - } + if (fCurReaderData) + fReaderStack->push(fCurReaderData); // // Make the passed reader and entity the current top of stack. The // passed entity can (and often is) null. // + fCurReaderData = new (fMemoryManager) ReaderData(reader, entity, adoptEntity); fCurReader = reader; - fCurEntity = entity; return true; } - void ReaderMgr::reset() { // Reset all of the flags fThrowEOE = false; // Delete the current reader and flush the reader stack - delete fCurReader; + delete fCurReaderData; + fCurReaderData = 0; fCurReader = 0; if (fReaderStack) fReaderStack->removeAllElements(); - - // - // And do the same for the entity stack, but don't delete the current - // entity (if any) since we don't own them. - // - fCurEntity = 0; - if (fEntityStack) - fEntityStack->removeAllElements(); } @@ -1014,7 +1064,9 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const // search the stack; else, keep the reader that we've got since its // either an external entity reader or the main file reader. // - const XMLEntityDecl* curEntity = fCurEntity; + const XMLEntityDecl* curEntity = + fCurReaderData? fCurReaderData->getEntity() : 0; + if (curEntity && !curEntity->isExternal()) { XMLSize_t index = fReaderStack->size(); @@ -1024,7 +1076,7 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const { // Move down to the previous element and get a pointer to it index--; - curEntity = fEntityStack->elementAt(index); + curEntity = fReaderStack->elementAt(index)->getEntity(); // // If its null or its an external entity, then this reader @@ -1032,12 +1084,12 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const // if (!curEntity) { - theReader = fReaderStack->elementAt(index); + theReader = fReaderStack->elementAt(index)->getReader (); break; } else if (curEntity->isExternal()) { - theReader = fReaderStack->elementAt(index); + theReader = fReaderStack->elementAt(index)->getReader (); break; } @@ -1048,6 +1100,11 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const } } + // @@ It feels like we may end up with theReader being from the top of + // the stack (fCurReader) and itsEntity being from the bottom of the + // stack (if there are no null or external entities on the stack). + // Is it a bug? + // itsEntity = curEntity; return theReader; } @@ -1059,31 +1116,59 @@ bool ReaderMgr::popReader() // We didn't get any more, so try to pop off a reader. If the reader // stack is empty, then we are at the end, so return false. // + // @@ It feels like we never pop the reader pushed to the stack first + // (think of fReaderStack empty but fCurReader not null). Is it a + // bug? + // if (fReaderStack->empty()) return false; // - // Remember the current entity, before we pop off a new one. We might + // Remember the current reader, before we pop off a new one. We might // need this to throw the end of entity exception at the end. // - XMLEntityDecl* prevEntity = fCurEntity; + ReaderData* prevReaderData = fCurReaderData; const bool prevReaderThrowAtEnd = fCurReader->getThrowAtEnd(); const XMLSize_t readerNum = fCurReader->getReaderNum(); // - // Delete the current reader and pop a new reader and entity off - // the stacks. + // Pop a new reader and entity off the stack. // - delete fCurReader; - fCurReader = fReaderStack->pop(); - fCurEntity = fEntityStack->pop(); + fCurReaderData = fReaderStack->pop(); + fCurReader = fCurReaderData->getReader(); // // If there was a previous entity, and either the fThrowEOE flag is set - // or reader was marked as such, then throw an end of entity. + // or reader was marked as such, then throw an end of entity. Otherwise, + // delete the previous reader data. // - if (prevEntity && (fThrowEOE || prevReaderThrowAtEnd)) - throw EndOfEntityException(prevEntity, readerNum); + if (prevReaderData->getEntity() && (fThrowEOE || prevReaderThrowAtEnd)) + { + // + // If the entity is adopted, then move it to fEntityStack so that + // its life-time is prolonged to the life-time of this reader + // manager. Also delete the previous reader data before throwing + // EndOfEntityException. + // + XMLEntityDecl* entity; + + if (prevReaderData->getEntityAdopted()) + { + if (!fEntityStack) + fEntityStack = new (fMemoryManager) RefStackOf(16, true, fMemoryManager); + + entity = prevReaderData->releaseEntity(); + fEntityStack->push(entity); + } + else + entity = prevReaderData->getEntity(); + + delete prevReaderData; + + throw EndOfEntityException(entity, readerNum); + } + else + delete prevReaderData; while (true) { @@ -1113,9 +1198,9 @@ bool ReaderMgr::popReader() return false; // Else pop again and try it one more time - delete fCurReader; - fCurReader = fReaderStack->pop(); - fCurEntity = fEntityStack->pop(); + delete fCurReaderData; + fCurReaderData = fReaderStack->pop(); + fCurReader = fCurReaderData->getReader(); } return true; } diff --git a/libxerces-c/xercesc/internal/ReaderMgr.hpp b/libxerces-c/xercesc/internal/ReaderMgr.hpp index f63b219..3209564 100644 --- a/libxerces-c/xercesc/internal/ReaderMgr.hpp +++ b/libxerces-c/xercesc/internal/ReaderMgr.hpp @@ -160,6 +160,12 @@ public : XMLReader* const reader , XMLEntityDecl* const entity ); + bool pushReaderAdoptEntity + ( + XMLReader* const reader + , XMLEntityDecl* const entity + , const bool adoptEntity = true + ); void reset(); @@ -209,15 +215,71 @@ private : ReaderMgr& operator=(const ReaderMgr&); // ----------------------------------------------------------------------- + // Private data types + // ----------------------------------------------------------------------- + class ReaderData : public XMemory + { + public : + // --------------------------------------------------------------------- + // Constructors and Destructor + // --------------------------------------------------------------------- + ReaderData + ( XMLReader* const reader + , XMLEntityDecl* const entity + , const bool adoptEntity + ); + + ~ReaderData(); + + // ---------------------------------------------------------------------- + // Getter methods + // ---------------------------------------------------------------------- + XMLReader* getReader() const; + XMLEntityDecl* getEntity() const; + bool getEntityAdopted() const; + + XMLEntityDecl* releaseEntity(); + + private : + // --------------------------------------------------------------------- + // Unimplemented constructors and operators + // --------------------------------------------------------------------- + ReaderData(); + ReaderData(const ReaderData&); + ReaderData& operator=(const ReaderData&); + + // --------------------------------------------------------------------- + // Private data members + // + // fReader + // This is the pointer to the reader object that must be destroyed + // when this object is destroyed. + // + // fEntity + // fEntityAdopted + // This is the pointer to the entity object that, if adopted, must + // be destroyed when this object is destroyed. + // + // Note that we need to keep up with which of the pushed readers + // are pushed entity values that are being spooled. This is done + // to avoid the problem of recursive definitions. + // --------------------------------------------------------------------- + XMLReader* fReader; + XMLEntityDecl* fEntity; + bool fEntityAdopted; + }; + + // ----------------------------------------------------------------------- // Private data members // - // fCurEntity - // This is the current top of stack entity. We pull it off the stack - // and store it here for efficiency. + // fCurReaderData + // This is the current top of the reader data stack. We pull it off + // the stack and store it here for efficiency. // // fCurReader - // This is the current top of stack reader. We pull it off the - // stack and store it here for efficiency. + // This is the reader of the current top of the reader data stack. + // It contains the same value as fCurReaderData->fReader or NULL, + // if fCurReaderData is NULL. We store it here for efficiency. // // fEntityHandler // This is the installed entity handler. Its installed via the @@ -225,10 +287,14 @@ private : // process of creating external entity readers. // // fEntityStack - // We need to keep up with which of the pushed readers are pushed - // entity values that are being spooled. This is done to avoid the - // problem of recursive definitions. This stack consists of refs to - // EntityDecl objects for the pushed entities. + // This is a storage of orphaned XMLEntityDecl objects. The + // popReader() function adds a reader manager-adopted entity object + // to this storage before passing its pointer to the constructor + // of the being thrown EndOfEntityException exception. This makes + // sure that the life-time of an entity exposed to the exception + // handlers is the same as the life-time of reader manager (and so + // normally the life-time of the scanner which embeds the reader + // manager). // // fNextReaderNum // This is the reader serial number value. Each new reader that is @@ -236,8 +302,8 @@ private : // us catch things like partial markup errors and such. // // fReaderStack - // This is the stack of reader references. We own all the readers - // and destroy them when they are used up. + // This is the stack of reader data references. We own all the + // entries and destroy them when they are used up. // // fThrowEOE // This flag controls whether we throw an exception when we hit an @@ -252,12 +318,12 @@ private : // fStandardUriConformant // This flag controls whether we force conformant URI // ----------------------------------------------------------------------- - XMLEntityDecl* fCurEntity; + ReaderData* fCurReaderData; XMLReader* fCurReader; XMLEntityHandler* fEntityHandler; RefStackOf* fEntityStack; unsigned int fNextReaderNum; - RefStackOf* fReaderStack; + RefStackOf* fReaderStack; bool fThrowEOE; XMLReader::XMLVersion fXMLVersion; bool fStandardUriConformant; -- cgit v1.1