Creating Sitemap for an eCommerce website helps in indexing all the product pages by search engine crawlers.
Let's see how to create the sitemap from a product index file.
Create a service config file
Create a scheduler
Create an interface to read the XML file
Create an implementation class to read the XML file
- Create a service config file
- Create a scheduler
- Create sitemap read and write service interface and implementation
- Create models for parsing the index xml file
<aemquickstart
xmlns:xs="http://www.w3.org/2001/XMLSchema" version="2.0">
<channel>
<Item>
<title>
<![CDATA[ AEM Quickstart by Kishore ]]>
</title>
<ProductId>12345</ProductId>
<pubDate>02/28/2017 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Lorel Ipsum ]]>
</title>
<ProductId>56789</ProductId>
<pubDate>02/28/2019 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Create Sitemap in AEM ]]>
</title>
<ProductId>12987</ProductId>
<pubDate>03/28/2019 00:00:00.000000</pubDate>
</Item>
</channel>
</aemquickstart>
Create a service config file
package com.aemquickstart.core.configurations;
import org.osgi.service.metatype.annotations.AttributeDefinition;
import org.osgi.service.metatype.annotations.AttributeType;
import org.osgi.service.metatype.annotations.ObjectClassDefinition;
/*
* @author Kishore Polsani
*/
@ObjectClassDefinition(name = "AEM Quickstart Sitemap Configuration", description = "This configuration helps in creating a product sitemap, reading data from URL")
public @interface SitemapConfiguration {
@AttributeDefinition(name = "Scheduler name", description = "Name of the scheduler", type = AttributeType.STRING)
public String name() default "XML Reader Scheduler";
@AttributeDefinition(name = "Enabled", description = "Flag to enable/disable a scheduler", type = AttributeType.BOOLEAN)
public boolean enabled() default true;
@AttributeDefinition(name = "Cron expression", description = "Cron expression used by the scheduler", type = AttributeType.STRING)
public String cronExpression() default "0 * * * * ?";
@AttributeDefinition(name = "XML file path", description = "Path of the XML file on the system", type = AttributeType.STRING)
public String xmlFilePath();
@AttributeDefinition(name = "XML product index file URL", description = "URL from where XML response is to be read", type = AttributeType.STRING)
public String xmlResponseURL();
@AttributeDefinition(name = "JCR path", description = "Path in the JCR to store data", type = AttributeType.STRING)
public String jcrPath() default "/content/aemquickstart/en";
@AttributeDefinition(name = "Enter Domain", description = "Enter domain to be used in attribute.", type = AttributeType.STRING)
public String domain() default "https://localhost";
}
Create a scheduler
package com.aemquickstart.core.schedulers;
import org.apache.sling.commons.scheduler.ScheduleOptions;
import org.apache.sling.commons.scheduler.Scheduler;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Deactivate;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.osgi.service.metatype.annotations.Designate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.aemquickstart.core.configurations.SitemapConfiguration;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlReaderService;
import com.aemquickstart.core.services.SitemapXmlWriterService;
/**
* @author Kishore Polsani
*
*/
@Component(immediate = true, service = Runnable.class)
@Designate(ocd = SitemapConfiguration.class)
public class SitemapScheduler implements Runnable {
private final Logger log = LoggerFactory.getLogger(this.getClass());
private int schedulerId; // Id of the scheduler based on its name
@Reference
private Scheduler scheduler;
@Reference
private SitemapXmlReaderService sitemapXmlReaderService;
@Reference
private SitemapXmlWriterService sitemapXmlWriterService;
private String filePath; // XML file from where sitemap data to be read
private String productIndexFileUrl; // URL from where sitemap data to be read
private boolean isEnabled;
private String jcrPath;
private String domain;
/**
* Activate method to initialize sitemap
*
* @param sitemapXmlReaderConfiguration
*/
@Activate
protected void activate(SitemapConfiguration sitemapXmlReaderConfiguration) {
schedulerId = sitemapXmlReaderConfiguration.name().hashCode();
filePath = sitemapXmlReaderConfiguration.xmlFilePath();
productIndexFileUrl = sitemapXmlReaderConfiguration.xmlResponseURL();
isEnabled = sitemapXmlReaderConfiguration.enabled();
jcrPath = sitemapXmlReaderConfiguration.jcrPath();
domain = sitemapXmlReaderConfiguration.domain();
log.info("Scheduler activated: flag={}", isEnabled);
}
/**
* Modifies the sitemap scheduler id on modification
*
* @param sitemapXmlReaderConfiguration
*/
@Modified
protected void modified(SitemapConfiguration sitemapXmlReaderConfiguration) {
// Removing sitemap scheduler
removeScheduler();
// Updating the sitemap scheduler id
schedulerId = sitemapXmlReaderConfiguration.name().hashCode();
// Add the sitemap scheduler
addScheduler(sitemapXmlReaderConfiguration);
}
/**
* This method deactivates the scheduler and removes it
*
* @param sitemapXmlReaderConfiguration
*/
@Deactivate
protected void deactivate(SitemapConfiguration sitemapXmlReaderConfiguration) {
// Removing the scheduler
removeScheduler();
}
/**
* This method removes the scheduler
*/
private void removeScheduler() {
log.info("Removing scheduler: {}", schedulerId);
// Unscheduling/removing the scheduler
scheduler.unschedule(String.valueOf(schedulerId));
}
/**
* This method adds the scheduler
*
* @param schedulerConfiguration
*/
private void addScheduler(SitemapConfiguration xmlReaderConfiguration) {
if (isEnabled) {
ScheduleOptions scheduleOptions = scheduler.EXPR(xmlReaderConfiguration.cronExpression());
scheduleOptions.name(xmlReaderConfiguration.name());
scheduleOptions.canRunConcurrently(false);
scheduler.schedule(this, scheduleOptions);
log.info("Sitemap Scheduler {} is added", schedulerId);
} else {
log.info("Sitemap Scheduler {} is disabled", schedulerId);
}
}
/**
* Overridden run method to execute Job
*/
@Override
public void run() {
log.info("In Scheduler run(), isEnabled:{}", isEnabled);
if (isEnabled) {
ProductList productList = null;
if (productIndexFileUrl != null && !productIndexFileUrl.isEmpty()) {
log.info("Product XML URL: {}", productIndexFileUrl);
productList = sitemapXmlReaderService.readXMLFromURL(productIndexFileUrl);
sitemapXmlWriterService.createProductSiteMap(domain, productList, jcrPath, "url");
}
} else {
log.info("Sitemap Scheduler is not enabled");
}
}
}
Create an interface to read the XML file
package com.aemquickstart.core.services;
import com.aemquickstart.core.models.ProductList;
public interface SitemapXmlReaderService {
/**
* This method writes XML data into JCR
*/
public ProductList readXMLFromURL(String responseURL);
}
Create an implementation class to read the XML file
package com.aemquickstart.core.services.impl;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlReaderService;
@Component(immediate = true, service = SitemapXmlReaderService.class)
public class SitemapXmlReaderServiceImpl implements SitemapXmlReaderService {
// Logger
private final Logger log = LoggerFactory.getLogger(this.getClass());
// JAXB instance
private JAXBContext jaxbContext;
// JAXB Unmarshaller
private Unmarshaller unmarshaller;
@Override
public ProductList readXMLFromURL(String responseURL) {
log.info("In readXMLFromURL");
URLConnection urlConnection = null;
InputStreamReader inputStreamReader = null;
StringBuilder builder = new StringBuilder();
ProductList productList = null;
try {
URL url = new URL(responseURL);
urlConnection = url.openConnection();
if (urlConnection != null) {
urlConnection.setReadTimeout(30 * 1000);
}
if (urlConnection != null && urlConnection.getInputStream() != null) {
inputStreamReader = new InputStreamReader(urlConnection.getInputStream(), Charset.defaultCharset());
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
if (bufferedReader != null) {
int eof;
while ((eof = bufferedReader.read()) != -1) {
builder.append((char) eof);
}
bufferedReader.close();
}
}
log.info("closing input stream");
inputStreamReader.close();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
String xmlResponse = builder.toString();
log.debug("xmlResponse: {}", xmlResponse);
try {
jaxbContext = JAXBContext.newInstance(ProductList.class);
unmarshaller = jaxbContext.createUnmarshaller();
productList = (ProductList) unmarshaller.unmarshal(new StringReader(xmlResponse));
log.info("ProductList: {}", productList);
} catch (JAXBException e) {
log.info(e.getMessage(), e);
}
return productList;
}
}
Create an model class to parse the XML
Create Channel.java
Create Item.java to read all elements
Create an interface to write the sitemap to an XML file
Create an implementation class to create the sitemap
Now build the project. Open ConfigMgr and search for "AEM Quickstart Sitemap Configuration"
package com.aemquickstart.core.models;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
@XmlRootElement(name="aemquickstart")
@XmlAccessorType(XmlAccessType.FIELD)
public class ProductList {
@XmlElement
private Channel[] channel = new Channel[1];
public Channel[] getChannel() {
return channel;
}
public void setChannel(Channel[] channel) {
this.channel = channel;
}
}
Create Channel.java
package com.aemquickstart.core.models;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name="channel")
public class Channel {
@XmlElement
private Item[] Item = new Item[1]; // as the tag name in the xml file..
public Item[] getItem() {
return Item;
}
public void setItem(Item[] item) {
Item = item;
}
}
Create Item.java to read all elements
package com.aemquickstart.core.models;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
@XmlRootElement(name="Item")
@XmlAccessorType(XmlAccessType.FIELD)
public class Item {
@XmlElement
private String ProductId;
@XmlElement
private String title;
@XmlElement
private String pubDate;
//@XmlElement
public String getProductId() {
return ProductId;
}
public void setProductId(String productId) {
ProductId = productId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPubDate() {
return pubDate;
}
public void setPubDate(String pubDate) {
this.pubDate = pubDate;
}
}
Create an interface to write the sitemap to an XML file
package com.aemquickstart.core.services;
import com.aemquickstart.core.models.ProductList;
public interface SitemapXmlWriterService {
public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from);
}
Create an implementation class to create the sitemap
package com.aemquickstart.core.services.impl;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import javax.jcr.Binary;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.ValueFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import com.aemquickstart.core.models.Channel;
import com.aemquickstart.core.models.Item;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlWriterService;
import com.day.cq.wcm.api.Page;
import com.day.cq.wcm.api.PageManager;
@Component(immediate = true, service = SitemapXmlWriterService.class)
public class SitemapXmlWriterServiceImpl implements SitemapXmlWriterService {
// Logger
private final Logger log = LoggerFactory.getLogger(this.getClass());
// Injecting ResourceResolverFactory
@Reference
private ResourceResolverFactory resourceResolverFactory;
// JCR session
private Session session;
private Session getSession() {
try {
// Map for service user details
ResourceResolver resourceResolver = getResourceResolver();
// Getting the session by adapting the resourceResolver
session = resourceResolver.adaptTo(Session.class);
} catch (LoginException e) {
log.error(e.getMessage(), e);
}
return session;
}
private ResourceResolver getResourceResolver() throws LoginException {
Map xmlReaderMap = new HashMap<>();
xmlReaderMap.put(ResourceResolverFactory.SUBSERVICE, "aemquickstartSubservice");
// Getting ResourceResovler
ResourceResolver resourceResolver = resourceResolverFactory.getServiceResourceResolver(xmlReaderMap);
return resourceResolver;
}
@Override
public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from) {
log.info("createProductSiteMap: {}", from);
ResourceResolver resourceResolver = null;
try {
session = getSession();
if (!session.itemExists(jcrPath)) {
log.info("Provided path does not exist. Sitemap file can't be created under {}", jcrPath);
return;
} else {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.newDocument();
doc.setXmlStandalone(true);
Element rootElement = doc.createElement("urlset");
rootElement.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9");
doc.appendChild(rootElement);
addProductsToXml(domain, rootElement, jcrPath, doc, productList);
resourceResolver = getResourceResolver();
PageManager pgMgr = resourceResolver.adaptTo(PageManager.class);
Page homepage = pgMgr.getPage(jcrPath);
if (null != homepage) {
String sitemapFile = jcrPath + "/sitemap_products.xml";
generateXmlFile(jcrPath, doc, session, sitemapFile);
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
finally {
resourceResolver.close();
if(session != null) {
session.logout();
}
}
}
private void addProductsToXml(String domain, Element rootElement, String jcrPath, Document doc,
ProductList productList) {
// Getting the products from ProductList
Channel[] channels = productList.getChannel();
// Iterate for each item present in the XML file
log.info("Setting properties");
Item[] products = channels[0].getItem();
for (Item product : products) {
String productId = product.getProductId();
Element pdpUrlElement = doc.createElement("url");
Element pdpLoc = doc.createElement("loc");
String title = product.getTitle();
String pdpUrl = domain + jcrPath + "/pdp.html/" + title.replaceAll("[^a-zA-Z0-9-]", "") + "/" + productId;
pdpLoc.appendChild(doc.createTextNode(pdpUrl));
pdpUrlElement.appendChild(pdpLoc);
Calendar lastModified = Calendar.getInstance();
if (null != lastModified) {
Element pdpLstMod = doc.createElement("lastmod");
FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
pdpLstMod.appendChild(doc.createTextNode(DATE_FORMAT.format(lastModified.getTimeInMillis())));
pdpUrlElement.appendChild(pdpLstMod);
}
Element pdpChangeFreq = doc.createElement("changefreq");
pdpChangeFreq.appendChild(doc.createTextNode("Weekly"));
pdpUrlElement.appendChild(pdpChangeFreq);
rootElement.appendChild(pdpUrlElement); // PDP page is added to
// root.
}
}
private void generateXmlFile(String jcrPath, Document doc, Session session, String sitemapFile) {
try {
try {
log.info("sitemap file: {}", sitemapFile);
if (session.itemExists(sitemapFile)) {
log.info("Sitemap exists");
session.removeItem(sitemapFile);
session.save();
log.info("Old Sitemap is deleted");
}
} catch (Exception e) {
log.error("Exception while removing xml file: {} ", e);
}
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Source xmlSource = new DOMSource(doc);
Result outputTarget = new StreamResult(outputStream);
TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
// create file at file location
ValueFactory valueFactory = session.getValueFactory();
Binary contentValue;
contentValue = valueFactory.createBinary(is);
Node homepageNode = session.getNode(jcrPath);
Node sitemapNode = homepageNode.addNode("sitemap_products.xml", "nt:file");
Node resNode = sitemapNode.addNode("jcr:content", "nt:resource");
resNode.setProperty("jcr:data", contentValue);
resNode.setProperty("jcr:mimeType", "text/xml");
Calendar lastModified = Calendar.getInstance();
lastModified.setTimeInMillis(lastModified.getTimeInMillis());
resNode.setProperty("jcr:lastModified", lastModified);
session.save();
log.info("Sitemap is successfull created at {}", sitemapFile);
} catch (RepositoryException rpe) {
log.error("Exception in Text Renderer: {}", rpe);
} catch (Exception e) {
log.error("Exception in while writting or creating file Renderer: {}", e);
}
}
}
Now build the project. Open ConfigMgr and search for "AEM Quickstart Sitemap Configuration"
- Enter Scheduler name and select enabled checkbox.
- Enter cross expression - to update the frequency.
- Enter the XML file URL - where you would like to read the product info.
- Enter JCR path - where you need to upload your sitemap_products.xml file
- Enter Domain - this value will be used while creating the product url for attribute
No comments:
Post a Comment
If you have any doubts or questions, please let us know.