diff -r 000000000000 -r dd21522fd290 browserutilities/feedsengine/FeedsServer/FeedHandler/src/RssFeedParser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/browserutilities/feedsengine/FeedsServer/FeedHandler/src/RssFeedParser.cpp Mon Mar 30 12:54:55 2009 +0300 @@ -0,0 +1,549 @@ +/* +* Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of the License "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: RSS parser +* +*/ + + +#include "FeedAttributes.h" +#include "FeedParserObserver.h" +#include "LeakTracker.h" +#include "RssFeedParser.h" +#include "XmlUtils.h" + + +// Element and attribute names used by this parser. +_LIT8(KRssHead, "rss"); +_LIT8(KRdfHead, "RDF"); + +_LIT8(KDc, "dc"); +_LIT8(KEnc, "enc"); + +_LIT8(KChannel, "channel"); +_LIT8(KTitle, "title"); +_LIT8(KLink, "link"); +_LIT8(KDescription, "description"); +_LIT8(KItem, "item"); +_LIT8(KAbout, "about"); +_LIT8(KGuid, "guid"); +_LIT8(KEnclosure, "enclosure"); +_LIT8(KUrl, "url"); +_LIT8(KResource, "resource"); +_LIT8(KType, "type"); +_LIT8(KLength, "length"); +_LIT8(KDate, "date"); +_LIT8(KPubDate, "pubdate"); +_LIT8(KLastBuildDate, "lastbuilddate"); + +//#ifdef _DEBUG +_LIT8(KImage, "image"); +_LIT8(KTextInput, "textinput"); +//#endif + +// ----------------------------------------------------------------------------- +// RssFeedParser::NewL +// +// Two-phased constructor. +// ----------------------------------------------------------------------------- +// +CRssFeedParser* CRssFeedParser::NewL(CXmlUtils& aXmlUtils) + { + CRssFeedParser* self = new (ELeave) CRssFeedParser(aXmlUtils); + + CleanupStack::PushL(self); + self->ConstructL(); + CleanupStack::Pop(); + + return self; + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::CRssFeedParser +// C++ default constructor can NOT contain any code, that +// might leave. +// ----------------------------------------------------------------------------- +// +CRssFeedParser::CRssFeedParser(CXmlUtils& aXmlUtils): + CFeedParser(aXmlUtils), iLeakTracker(CLeakTracker::ERssFeedParser) + { + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ConstructL +// Symbian 2nd phase constructor can leave. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ConstructL() + { + // The mappings are used to map elements to handler methods. For example when + // a element is found its contents are extracted as CDATA and passed + // on to the observer as a EFeedAttributeTitle. + + // Add mappings to process the children of a channel. + AddFeedMappingL(KNullDesC8(), KItem(), EFeedAttributeUnused, ElementHandlerItemL); + + AddFeedMappingL(KNullDesC8(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL); + AddFeedMappingL(KNullDesC8(), KLink(), EFeedAttributeLink, ElementHandlerUrlL); + AddFeedMappingL(KNullDesC8(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL); + AddFeedMappingL(KNullDesC8(), KPubDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); + AddFeedMappingL(KNullDesC8(), KLastBuildDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); + + AddFeedMappingL(KDc(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL); + AddFeedMappingL(KDc(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL); + AddFeedMappingL(KDc(), KDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); + +//#ifdef _DEBUG + AddFeedMappingL(KNullDesC8(), KImage(), EFeedAttributeUnused, ElementHandlerOtherL); + AddFeedMappingL(KNullDesC8(), KTextInput(), EFeedAttributeUnused, ElementHandlerOtherL); +//#endif + + // Add mappings to process the children of an item. + AddItemMappingL(KNullDesC8(), KTitle(), EItemAttributeTitle, ElementHandlerTextL); + AddItemMappingL(KNullDesC8(), KLink(), EItemAttributeLink, ElementHandlerUrlL); + AddItemMappingL(KNullDesC8(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL); + AddItemMappingL(KNullDesC8(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL); + AddItemMappingL(KNullDesC8(), KPubDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); + + AddItemMappingL(KDc(), KTitle(), EItemAttributeTitle, ElementHandlerTextL); + AddItemMappingL(KDc(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL); + AddItemMappingL(KDc(), KDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); + AddItemMappingL(KEnc(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL); + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::~CRssFeedParser +// Deconstructor. +// ----------------------------------------------------------------------------- +// +CRssFeedParser::~CRssFeedParser() + { + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ParseFeedL +// +// Creates a Feed instance from the given document. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ParseFeedL(RXmlEngDocument aDocument, + MFeedParserObserver& aObserver) + { + TXmlEngElement rootNode; + TXmlEngElement channelNode; + + // Get the root node. + rootNode = iXmlUtils.GetDocumentFirstElement(aDocument); + + // Find the channel element. + channelNode = iXmlUtils.GetFirstNamedChild(rootNode, KChannel()); + if (channelNode.IsNull()) + { + User::Leave(KErrCorrupt); + } + + iFeedTimestampNode = NULL; + + // Process the channel's children. + aObserver.FeedBeginsL(); + + // Process the elements in the channel node. + ProcessElementsL(channelNode, aObserver); + + // Process all of the other elements except the channel element as it is + // processed above. This second call is needed in order to support RSS 1.0 + // as well as malformed RSS 2.0. + ProcessElementsL(rootNode, aObserver); + + // Set the timestamp attribute. + if (iFeedTimestampNode.NotNull()) + { + ElementHandlerDateL(*this, iXmlUtils, iFeedTimestampNode, + EFeedAttributeTimestamp, aObserver); + } + + aObserver.FeedEndsL(); + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::IsFeedSupported +// +// Returns true if this feed parser can process the given document. +// ----------------------------------------------------------------------------- +// +TBool CRssFeedParser::IsFeedSupported(CXmlUtils& aXmlUtils, + RXmlEngDocument aDocument, const TDesC& /*aContentType*/) + { + TXmlEngElement node; + + // Get the root element. + node = aXmlUtils.GetDocumentFirstElement(aDocument); + + // If the root node is missing the feed isn't supported. + if (node.IsNull()) + { + return EFalse; + } + + // Check the content-type. + // TODO: + + // If the root node is not HEAD_STR the feed isn't supported. + if (!aXmlUtils.IsNamed(node, KRssHead) && !aXmlUtils.IsNamed(node, KRdfHead)) + { + return EFalse; + } + + return ETrue; + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ProcessElementsL +// +// Process all elements except the channel elements. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ProcessElementsL(TXmlEngElement aNode, + MFeedParserObserver& aObserver) const + { + TXmlEngElement node = NULL; + + node = iXmlUtils.GetFirstElementChild(aNode); + + while (node.NotNull()) + { + // Process the element. + HandleFeedChildL(node, aObserver); + + // Get the next element. + node = iXmlUtils.GetNextSiblingElement(node); + } + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ElementHandlerItemL +// +// A ElementHandler function that populates the Item instance with the +// values from the given item node. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ElementHandlerItemL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver) + { + CRssFeedParser& parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser))); + TXmlEngElement node; + + // Create an empty Item. + aObserver.ItemBeginsL(); + + // Process the item's children. + parser.iItemTimestampNode = NULL; + node = aXmlUtils.GetFirstElementChild(aNode); + + while (node.NotNull()) + { + // Process the element. + parser.HandleItemChildL(node, aObserver); + + // Get the next element. + node = aXmlUtils.GetNextSiblingElement(node); + } + + // Postprocess the EItemAttributeIdStr attribute. + parser.SetItemIdStrAttributeL(aNode, aObserver); + + // Postprocess the EItemAttributeTimestamp attribute. + if (parser.iItemTimestampNode.NotNull()) + { + ElementHandlerDateL(aParser, aXmlUtils, parser.iItemTimestampNode, + EItemAttributeTimestamp, aObserver); + } + + // Add the Item to the Feed. + aObserver.ItemEndsL(); + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ElementHandlerEnclosureL +// +// An ElementHandler function that extracts the an enclosure. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ElementHandlerEnclosureL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver) + { + HBufC* url = NULL; + HBufC* resource = NULL; + HBufC* type = NULL; + HBufC* length = NULL; + + // Create an empty enclosure. + aObserver.EnclosureBeginsL(); + + // Extract and the enclosure attributes + url = aXmlUtils.AttributeL(aNode, KUrl); + CleanupStack::PushL(url); + + resource = aXmlUtils.AttributeL(aNode, KResource); + CleanupStack::PushL(resource); + + type = aXmlUtils.AttributeL(aNode, KType); + CleanupStack::PushL(type); + + length = aXmlUtils.AttributeL(aNode, KLength); + CleanupStack::PushL(length); + + // Add either the resource or url attribute. + if (resource != NULL) + { + aObserver.AddAttributeL(EEnclosureAttributeLink, *resource); + } + else if (url != NULL) + { + aObserver.AddAttributeL(EEnclosureAttributeLink, *url); + } + + // Add the other attributes. + if (type != NULL) + { + aObserver.AddAttributeL(EEnclosureAttributeContentType, *type); + } + + if (length != NULL) + { + aObserver.AddAttributeL(EEnclosureAttributeSize, *length); + } + + // Signal the end of the enclosure. + aObserver.EnclosureEndsL(); + + CleanupStack::PopAndDestroy(length); + CleanupStack::PopAndDestroy(type); + CleanupStack::PopAndDestroy(resource); + CleanupStack::PopAndDestroy(url); + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ElementHandlerTimestampL +// +// A ElementHandler function that determines the timestamp to use. The +// point is to track the most relevant timestamp. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ElementHandlerTimestampL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& /*aObserver*/) + { + CRssFeedParser& parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser))); + + // Handle feed related timestamps + if (aValueId == EFeedAttributeTimestamp) + { + if (aXmlUtils.IsNamed(aNode, KLastBuildDate)) + { + parser.iFeedTimestampNode = aNode; + } + + // As LastBuildDate is more relevant only update iTimestamp if it wasn't + // already set. PubDate and dc:date mean the same thing, so once it's set, it's set. + else if (parser.iFeedTimestampNode.IsNull()) + { + if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate)) + { + parser.iFeedTimestampNode = aNode; + } + } + } + + // Otherwise handle item related timestamps + else + { + // PubDate and dc:date mean the same thing, so once it's set, it's set. + if (parser.iItemTimestampNode.IsNull()) + { + if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate)) + { + parser.iItemTimestampNode = aNode; + } + } + } + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::ElementHandlerOtherL +// +// A ElementHandler function that used for feed validation. +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::ElementHandlerOtherL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver) + { + const CRssFeedParser& self = static_cast<const CRssFeedParser&>(aParser); + TXmlEngElement node = NULL; + TDesC* text; + + if ((node = aXmlUtils.GetFirstNamedChild(aNode, KTitle)).NotNull()) + { + text = self.iXmlUtils.ExtractTextL(node); + CleanupStack::PushL(text); + + if ((text != NULL) && (text->Length() > 0)) + { + aObserver.OtherTitleL(); + } + + CleanupStack::PopAndDestroy(text); + } + + if ((node = aXmlUtils.GetFirstNamedChild(aNode, KDescription)).NotNull()) + { + text = self.iXmlUtils.ExtractTextL(node); + CleanupStack::PushL(text); + + if ((text != NULL) && (text->Length() > 0)) + { + aObserver.OtherDescriptionL(); + } + + CleanupStack::PopAndDestroy(text); + } + + if ((node = aXmlUtils.GetFirstNamedChild(aNode, KLink)).NotNull()) + { + text = self.iXmlUtils.ExtractTextL(node); + CleanupStack::PushL(text); + + if ((text != NULL) && (text->Length() > 0)) + { + aObserver.OtherLinkL(); + } + + CleanupStack::PopAndDestroy(text); + } + } + + +// ----------------------------------------------------------------------------- +// CRssFeedParser::SetItemIdStrAttributeL +// +// Determine and set the unique IdStr attribute (unique to the feed that is). +// ----------------------------------------------------------------------------- +// +void CRssFeedParser::SetItemIdStrAttributeL(TXmlEngElement aItemNode, + MFeedParserObserver& aObserver) + { + const TInt KStrChunk = 30; + + TDesC* aboutAttribute = NULL; + TDesC* description = NULL; + TDesC* title = NULL; + HBufC* idStr = NULL; + HBufC* url = NULL; + TXmlEngElement node; + + // If the about attribute is present then use it. + aboutAttribute = iXmlUtils.AttributeL(aItemNode, KAbout); + if (aboutAttribute != NULL) + { + CleanupStack::PushL(aboutAttribute); + aObserver.AddAttributeL(EItemAttributeIdStr, *aboutAttribute); + CleanupStack::PopAndDestroy(aboutAttribute); + + return; + } + + // Otherwise if the GUid node is present use it. + node = iXmlUtils.GetFirstNamedChild(aItemNode, KGuid); + if (node.NotNull()) + { + ElementHandlerCDataL(*this, iXmlUtils, node, EItemAttributeIdStr, aObserver); + return; + } + + // Otherwise create a idStr from the first 30 chars of the description and title + // and the last 30 chars of the url. This doesn't guarantee a unique id, but + // it very likely. + node = iXmlUtils.GetFirstNamedChild(aItemNode, KDescription); + description = iXmlUtils.ExtractSimpleTextL(node, KStrChunk); + CleanupStack::PushL(description); + + node = iXmlUtils.GetFirstNamedChild(aItemNode, KTitle); + title = iXmlUtils.ExtractSimpleTextL(node, KStrChunk); + CleanupStack::PushL(title); + + node = iXmlUtils.GetFirstNamedChild(aItemNode, KLink); + url = iXmlUtils.ExtractSimpleTextL(node, KStrChunk, ETrue); + CleanupStack::PushL(url); + + // Construct the idStr from the parts. + TInt len = 0; + + if (description != NULL) + { + len += description->Length(); + } + if (title != NULL) + { + len += title->Length(); + } + if (url != NULL) + { + len += url->Length(); + } + + idStr = HBufC::NewL(len); + CleanupStack::PushL(idStr); + + TPtr ptr(idStr->Des()); + + if (description != NULL) + { + ptr.Append(*description); + } + if (title != NULL) + { + ptr.Append(*title); + } + if (url != NULL) + { + ptr.Append(*url); + } + + // Replace any chars that may interfere with the database. + _LIT(KSpace, " "); + + for (TInt i = 0; i < ptr.Length(); i++) + { + if (ptr[i] == '\'') + { + ptr.Replace(i, 1, KSpace()); + } + } + + // Set the idStr attribute. + aObserver.AddAttributeL(EItemAttributeIdStr, *idStr); + + CleanupStack::PopAndDestroy(idStr); + CleanupStack::PopAndDestroy(url); + CleanupStack::PopAndDestroy(title); + CleanupStack::PopAndDestroy(description); + }