April 27, 2020
Estimated Post Reading Time ~

How to generate sitemap for multi site environments?

This post will explain how to generate the sitemap for different sites(home pages) in a multi-site environment of Adobe Experience Manager(AEM).

The sitemap will be generated dynamically whenever the user accesses the sitemap URL for a particular site.

Factory servlet to generate the sitemap.xml:import java.io.IOException;
import java.util.*;

import javax.servlet.ServletException;
import javax.xml.stream.*;

import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.felix.scr.annotations.*;
import org.apache.sling.api.*;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.servlets.SlingSafeMethodsServlet;
import org.apache.sling.commons.osgi.PropertiesUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.day.cq.commons.Externalizer;
import com.day.cq.wcm.api.*;

@Component(metatype = true, label = "Site Map", description = "Site Map", configurationFactory = true)
@Service
@SuppressWarnings("serial")
@Properties({
@Property(name = "sling.servlet.resourceTypes", unbounded = PropertyUnbounded.ARRAY,
label = "Homepage Resource Type", description = "Sling Resource Type for Home Page component"),
@Property(name = "sling.servlet.selectors", value = "sitemap", propertyPrivate = true),
@Property(name = "sling.servlet.extensions", value = "xml", propertyPrivate = true),
@Property(name = "sling.servlet.methods", value = "GET", propertyPrivate = true),
@Property(name = "webconsole.configurationFactory.nameHint",
value = "Site Map on resource types: [{sling.servlet.resourceTypes}]") })
public final class SiteMapGeneratorServlet extends SlingSafeMethodsServlet {

private static final Logger LOG = LoggerFactory.getLogger(SiteMapGeneratorServlet.class);
private static final FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
private static final boolean INCLUDE_LAST_MODIFIED_DEFAULT_VALUE = false;

@Property(boolValue = INCLUDE_LAST_MODIFIED_DEFAULT_VALUE, label = "Include Last Modified Date",
description = "If checked, last modified value will be shown in sitemap.")
private static final String INCLUDE_LAST_MODIFIED_PROPERTY = "include.lastmod";

private static final String SITEMAP_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";

@Reference
private Externalizer externalizer;

private boolean incLastModified;

@Activate
protected void activate(Map<String, Object> properties) {
this.incLastModified = PropertiesUtil.toBoolean(properties.get(INCLUDE_LAST_MODIFIED_PROPERTY),
INCLUDE_LAST_MODIFIED_DEFAULT_VALUE);
}

@Override
protected void doGet(SlingHttpServletRequest slingRequest, SlingHttpServletResponse slingResponse)
throws ServletException, IOException {

slingResponse.setContentType(slingRequest.getResponseContentType());
ResourceResolver resourceResolver = slingRequest.getResourceResolver();
PageManager pageManager = resourceResolver.adaptTo(PageManager.class);
Page pageObj = pageManager.getContainingPage(slingRequest.getResource());

XMLOutputFactory outputFactory = XMLOutputFactory.newFactory();
try {
XMLStreamWriter stream = outputFactory.createXMLStreamWriter(slingResponse.getWriter());

stream.writeStartDocument("1.0");
stream.writeStartElement("", "urlset", SITEMAP_NAMESPACE);
stream.writeNamespace("", SITEMAP_NAMESPACE);

// Current page
writeXML(pageObj, stream, slingRequest);

for (Iterator<Page> children = pageObj.listChildren(new PageFilter(), true); children.hasNext();) {

Page childPage = (Page) children.next();
// If condition added to make sure the pages hidden in search in page properties do not show up in sitemap
if (null != childPage) {
if (!childPage.getProperties().containsKey("hideInSearch")
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("false"))
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("")))
writeXML(childPage, stream, slingRequest);
}
}

stream.writeEndElement();
stream.writeEndDocument();

} catch (XMLStreamException e) {
throw new IOException(e);
}
}

private void writeXML(Page pageObj, XMLStreamWriter xmlStream, SlingHttpServletRequest slingRequest)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, "url");

String protocolPort = "http";
if (slingRequest.isSecure())
protocolPort = "https";

String locPath = this.externalizer.absoluteLink(slingRequest, protocolPort,
String.format("%s.html", pageObj.getPath()));

writeXMLElement(xmlStream, "loc", locPath);

if (this.incLastModified) {
Calendar calendarObj = pageObj.getLastModified();
if (null != calendarObj) {
writeXMLElement(xmlStream, "lastmod", DATE_FORMAT.format(calendarObj));
}
}
xmlStream.writeEndElement();
}

private void writeXMLElement(final XMLStreamWriter xmlStream, final String elementName, final String xmlText)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, elementName);
xmlStream.writeCharacters(xmlText);
xmlStream.writeEndElement();
}

}

Create new servlet configuration from the factory through the OSGI console by providing the following details

Home Page Resouce Type - add the Home page resource types that should be considered for generating sitemap.xml

Include Last Modified Date - If selected the last modified date of the page will be included as part of the sitemap.xml





Enable hideInSearch checkbox in all page properties


Select hideInSearch for those child pages should be excluded from sitemap.xml

Access the sitemap.xml for the site with the following URL - http://<<site host>>/<<parent node with configured resource type>>.sitemap.xml

e.g.
http://example1.com/en.sitemap.xml
http://example2.com/en.sitemap.xml

<?xml version="1.0"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://example1.com/en.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
<url>
<loc>http://example1.com/en/test.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
</urlset>


By aem4beginner

No comments:

Post a Comment

If you have any doubts or questions, please let us know.