This post will explain how to generate the sitemap for different sites(home pages) in a multi-site environment of Adobe Experience Manager(AEM).
The sitemap will be generated dynamically whenever the user accesses the sitemap URL for a particular site.
Factory servlet to generate the sitemap.xml:import java.io.IOException;
import java.util.*;
import javax.servlet.ServletException;
import javax.xml.stream.*;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.felix.scr.annotations.*;
import org.apache.sling.api.*;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.servlets.SlingSafeMethodsServlet;
import org.apache.sling.commons.osgi.PropertiesUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.day.cq.commons.Externalizer;
import com.day.cq.wcm.api.*;
@Component(metatype = true, label = "Site Map", description = "Site Map", configurationFactory = true)
@Service
@SuppressWarnings("serial")
@Properties({
@Property(name = "sling.servlet.resourceTypes", unbounded = PropertyUnbounded.ARRAY,
label = "Homepage Resource Type", description = "Sling Resource Type for Home Page component"),
@Property(name = "sling.servlet.selectors", value = "sitemap", propertyPrivate = true),
@Property(name = "sling.servlet.extensions", value = "xml", propertyPrivate = true),
@Property(name = "sling.servlet.methods", value = "GET", propertyPrivate = true),
@Property(name = "webconsole.configurationFactory.nameHint",
value = "Site Map on resource types: [{sling.servlet.resourceTypes}]") })
public final class SiteMapGeneratorServlet extends SlingSafeMethodsServlet {
private static final Logger LOG = LoggerFactory.getLogger(SiteMapGeneratorServlet.class);
private static final FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
private static final boolean INCLUDE_LAST_MODIFIED_DEFAULT_VALUE = false;
@Property(boolValue = INCLUDE_LAST_MODIFIED_DEFAULT_VALUE, label = "Include Last Modified Date",
description = "If checked, last modified value will be shown in sitemap.")
private static final String INCLUDE_LAST_MODIFIED_PROPERTY = "include.lastmod";
private static final String SITEMAP_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";
@Reference
private Externalizer externalizer;
private boolean incLastModified;
@Activate
protected void activate(Map<String, Object> properties) {
this.incLastModified = PropertiesUtil.toBoolean(properties.get(INCLUDE_LAST_MODIFIED_PROPERTY),
INCLUDE_LAST_MODIFIED_DEFAULT_VALUE);
}
@Override
protected void doGet(SlingHttpServletRequest slingRequest, SlingHttpServletResponse slingResponse)
throws ServletException, IOException {
slingResponse.setContentType(slingRequest.getResponseContentType());
ResourceResolver resourceResolver = slingRequest.getResourceResolver();
PageManager pageManager = resourceResolver.adaptTo(PageManager.class);
Page pageObj = pageManager.getContainingPage(slingRequest.getResource());
XMLOutputFactory outputFactory = XMLOutputFactory.newFactory();
try {
XMLStreamWriter stream = outputFactory.createXMLStreamWriter(slingResponse.getWriter());
stream.writeStartDocument("1.0");
stream.writeStartElement("", "urlset", SITEMAP_NAMESPACE);
stream.writeNamespace("", SITEMAP_NAMESPACE);
// Current page
writeXML(pageObj, stream, slingRequest);
for (Iterator<Page> children = pageObj.listChildren(new PageFilter(), true); children.hasNext();) {
Page childPage = (Page) children.next();
// If condition added to make sure the pages hidden in search in page properties do not show up in sitemap
if (null != childPage) {
if (!childPage.getProperties().containsKey("hideInSearch")
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("false"))
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("")))
writeXML(childPage, stream, slingRequest);
}
}
stream.writeEndElement();
stream.writeEndDocument();
} catch (XMLStreamException e) {
throw new IOException(e);
}
}
private void writeXML(Page pageObj, XMLStreamWriter xmlStream, SlingHttpServletRequest slingRequest)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, "url");
String protocolPort = "http";
if (slingRequest.isSecure())
protocolPort = "https";
String locPath = this.externalizer.absoluteLink(slingRequest, protocolPort,
String.format("%s.html", pageObj.getPath()));
writeXMLElement(xmlStream, "loc", locPath);
if (this.incLastModified) {
Calendar calendarObj = pageObj.getLastModified();
if (null != calendarObj) {
writeXMLElement(xmlStream, "lastmod", DATE_FORMAT.format(calendarObj));
}
}
xmlStream.writeEndElement();
}
private void writeXMLElement(final XMLStreamWriter xmlStream, final String elementName, final String xmlText)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, elementName);
xmlStream.writeCharacters(xmlText);
xmlStream.writeEndElement();
}
}
Create new servlet configuration from the factory through the OSGI console by providing the following details
Home Page Resouce Type - add the Home page resource types that should be considered for generating sitemap.xml
Include Last Modified Date - If selected the last modified date of the page will be included as part of the sitemap.xml
data:image/s3,"s3://crabby-images/b064d/b064d3a6ac3892a894f23d47b4e229210458d83b" alt=""
data:image/s3,"s3://crabby-images/b673d/b673d943dbb8af2136279b22fb1b9ff52f239d86" alt=""
Enable hideInSearch checkbox in all page properties
data:image/s3,"s3://crabby-images/6b688/6b68818f39c0f7bebdf3d65a84d5da5cdcb70339" alt=""
Select hideInSearch for those child pages should be excluded from sitemap.xml
Access the sitemap.xml for the site with the following URL - http://<<site host>>/<<parent node with configured resource type>>.sitemap.xml
e.g.
http://example1.com/en.sitemap.xml
http://example2.com/en.sitemap.xml
<?xml version="1.0"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://example1.com/en.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
<url>
<loc>http://example1.com/en/test.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
</urlset>
The sitemap will be generated dynamically whenever the user accesses the sitemap URL for a particular site.
Factory servlet to generate the sitemap.xml:import java.io.IOException;
import java.util.*;
import javax.servlet.ServletException;
import javax.xml.stream.*;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.felix.scr.annotations.*;
import org.apache.sling.api.*;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.servlets.SlingSafeMethodsServlet;
import org.apache.sling.commons.osgi.PropertiesUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.day.cq.commons.Externalizer;
import com.day.cq.wcm.api.*;
@Component(metatype = true, label = "Site Map", description = "Site Map", configurationFactory = true)
@Service
@SuppressWarnings("serial")
@Properties({
@Property(name = "sling.servlet.resourceTypes", unbounded = PropertyUnbounded.ARRAY,
label = "Homepage Resource Type", description = "Sling Resource Type for Home Page component"),
@Property(name = "sling.servlet.selectors", value = "sitemap", propertyPrivate = true),
@Property(name = "sling.servlet.extensions", value = "xml", propertyPrivate = true),
@Property(name = "sling.servlet.methods", value = "GET", propertyPrivate = true),
@Property(name = "webconsole.configurationFactory.nameHint",
value = "Site Map on resource types: [{sling.servlet.resourceTypes}]") })
public final class SiteMapGeneratorServlet extends SlingSafeMethodsServlet {
private static final Logger LOG = LoggerFactory.getLogger(SiteMapGeneratorServlet.class);
private static final FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
private static final boolean INCLUDE_LAST_MODIFIED_DEFAULT_VALUE = false;
@Property(boolValue = INCLUDE_LAST_MODIFIED_DEFAULT_VALUE, label = "Include Last Modified Date",
description = "If checked, last modified value will be shown in sitemap.")
private static final String INCLUDE_LAST_MODIFIED_PROPERTY = "include.lastmod";
private static final String SITEMAP_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";
@Reference
private Externalizer externalizer;
private boolean incLastModified;
@Activate
protected void activate(Map<String, Object> properties) {
this.incLastModified = PropertiesUtil.toBoolean(properties.get(INCLUDE_LAST_MODIFIED_PROPERTY),
INCLUDE_LAST_MODIFIED_DEFAULT_VALUE);
}
@Override
protected void doGet(SlingHttpServletRequest slingRequest, SlingHttpServletResponse slingResponse)
throws ServletException, IOException {
slingResponse.setContentType(slingRequest.getResponseContentType());
ResourceResolver resourceResolver = slingRequest.getResourceResolver();
PageManager pageManager = resourceResolver.adaptTo(PageManager.class);
Page pageObj = pageManager.getContainingPage(slingRequest.getResource());
XMLOutputFactory outputFactory = XMLOutputFactory.newFactory();
try {
XMLStreamWriter stream = outputFactory.createXMLStreamWriter(slingResponse.getWriter());
stream.writeStartDocument("1.0");
stream.writeStartElement("", "urlset", SITEMAP_NAMESPACE);
stream.writeNamespace("", SITEMAP_NAMESPACE);
// Current page
writeXML(pageObj, stream, slingRequest);
for (Iterator<Page> children = pageObj.listChildren(new PageFilter(), true); children.hasNext();) {
Page childPage = (Page) children.next();
// If condition added to make sure the pages hidden in search in page properties do not show up in sitemap
if (null != childPage) {
if (!childPage.getProperties().containsKey("hideInSearch")
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("false"))
|| (childPage.getProperties().containsKey("hideInSearch")
&& childPage.getProperties().get("hideInSearch").equals("")))
writeXML(childPage, stream, slingRequest);
}
}
stream.writeEndElement();
stream.writeEndDocument();
} catch (XMLStreamException e) {
throw new IOException(e);
}
}
private void writeXML(Page pageObj, XMLStreamWriter xmlStream, SlingHttpServletRequest slingRequest)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, "url");
String protocolPort = "http";
if (slingRequest.isSecure())
protocolPort = "https";
String locPath = this.externalizer.absoluteLink(slingRequest, protocolPort,
String.format("%s.html", pageObj.getPath()));
writeXMLElement(xmlStream, "loc", locPath);
if (this.incLastModified) {
Calendar calendarObj = pageObj.getLastModified();
if (null != calendarObj) {
writeXMLElement(xmlStream, "lastmod", DATE_FORMAT.format(calendarObj));
}
}
xmlStream.writeEndElement();
}
private void writeXMLElement(final XMLStreamWriter xmlStream, final String elementName, final String xmlText)
throws XMLStreamException {
xmlStream.writeStartElement(SITEMAP_NAMESPACE, elementName);
xmlStream.writeCharacters(xmlText);
xmlStream.writeEndElement();
}
}
Create new servlet configuration from the factory through the OSGI console by providing the following details
Home Page Resouce Type - add the Home page resource types that should be considered for generating sitemap.xml
Include Last Modified Date - If selected the last modified date of the page will be included as part of the sitemap.xml
data:image/s3,"s3://crabby-images/b064d/b064d3a6ac3892a894f23d47b4e229210458d83b" alt=""
data:image/s3,"s3://crabby-images/b673d/b673d943dbb8af2136279b22fb1b9ff52f239d86" alt=""
Enable hideInSearch checkbox in all page properties
data:image/s3,"s3://crabby-images/6b688/6b68818f39c0f7bebdf3d65a84d5da5cdcb70339" alt=""
Select hideInSearch for those child pages should be excluded from sitemap.xml
Access the sitemap.xml for the site with the following URL - http://<<site host>>/<<parent node with configured resource type>>.sitemap.xml
e.g.
http://example1.com/en.sitemap.xml
http://example2.com/en.sitemap.xml
<?xml version="1.0"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://example1.com/en.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
<url>
<loc>http://example1.com/en/test.html</loc>
<lastmod>2017-02-28</lastmod>
</url>
</urlset>
No comments:
Post a Comment
If you have any doubts or questions, please let us know.