October 1, 2020
Estimated Post Reading Time ~

Sitemap Scheulder with Factory Configuration using OSGi annotations

Creating a Sitemap for an eCommerce website helps in indexing all the product pages by search engine crawlers. There are situations that sitemap should be created for multiple sites, we can achieve this by adding factory=true and bind methods in schedulers.


Let's see how to create a sitemap from a product index file using a factory.
  1. Create a service config file
  2. Create a scheduler
  3. Create sitemap read and write service interface and implementation
  4. Create models for parsing the index xml file
Sample XML file hosted on a server, we will be configuring this XML hosted URL in Scheduler
<aemquickstart
xmlns:xs="http://www.w3.org/2001/XMLSchema" version="2.0">
<channel>
<Item>
<title>
<![CDATA[ AEM Quickstart by Kishore ]]>
</title>
<ProductId>12345</ProductId>
<pubDate>02/28/2017 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Lorel Ipsum ]]>
</title>
<ProductId>56789</ProductId>
<pubDate>02/28/2019 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Create Sitemap in AEM ]]>
</title>
<ProductId>12987</ProductId>
<pubDate>03/28/2019 00:00:00.000000</pubDate>
</Item>
</channel>
</aemquickstart>


Create a service config file
package com.aemquickstart.core.configurations;

import org.osgi.service.metatype.annotations.AttributeDefinition;
import org.osgi.service.metatype.annotations.AttributeType;
import org.osgi.service.metatype.annotations.ObjectClassDefinition;

/*
* @author Kishore Polsani
*/
@ObjectClassDefinition(name = "AEM Quickstart Sitemap Configuration", description = "This configuration helps in creating a product sitemap, reading data from URL")
public @interface SitemapConfiguration {

@AttributeDefinition(name = "Cron-job expression")
public String scheduler_expression() default "0 0/5 * * * ?";

@AttributeDefinition(name = "Concurrent task", description = "Whether or not to schedule this task concurrently")
boolean scheduler_concurrent() default false;

@AttributeDefinition(name = "Scheduler name", description = "Name of the scheduler", type = AttributeType.STRING)
public String name() default "XML Reader Scheduler";

@AttributeDefinition(name = "Enabled", description = "Flag to enable/disable a scheduler", type = AttributeType.BOOLEAN)
public boolean enabled() default true;

@AttributeDefinition(name = "XML product index file URL", description = "URL from where XML response is to be read", type = AttributeType.STRING)
public String xmlResponseURL();

@AttributeDefinition(name = "JCR path", description = "Path in the JCR to store data", type = AttributeType.STRING)
public String jcrPath() default "/content/aemquickstart/en";

@AttributeDefinition(name = "Enter Domain", description = "Enter domain to be used in attribute.", type = AttributeType.STRING)
public String domain() default "https://localhost";
}


Create a scheduler
package com.aemquickstart.core.schedulers;

import java.util.ArrayList;
import java.util.List;

import org.apache.sling.commons.scheduler.ScheduleOptions;
import org.apache.sling.commons.scheduler.Scheduler;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Deactivate;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.osgi.service.component.annotations.ReferenceCardinality;
import org.osgi.service.component.annotations.ReferencePolicy;
import org.osgi.service.metatype.annotations.Designate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aemquickstart.core.configurations.SitemapConfiguration;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapService;
import com.aemquickstart.core.services.SitemapXmlReaderService;
import com.aemquickstart.core.services.SitemapXmlWriterService;

/**
* @author Kishore Polsani
*
*/
@Component(immediate = true, service = Runnable.class)
@Designate(ocd = SitemapConfiguration.class, factory = true)
public class SitemapScheduler implements Runnable {

private final Logger log = LoggerFactory.getLogger(this.getClass());

private int schedulerId; // Id of the scheduler based on its name
private String name;
@Reference
private Scheduler scheduler;

@Reference
private SitemapXmlReaderService sitemapXmlReaderService;

@Reference
private SitemapXmlWriterService sitemapXmlWriterService;

private String productIndexFileUrl; // URL from where sitemap data to be
// read

private boolean isEnabled;

private String jcrPath;

private String domain;

/**
* Activate method to initialize sitemap
*
* @param sitemapXmlReaderConfiguration
*/
@Activate
protected void activate(SitemapConfiguration sitemapXmlReaderConfiguration) {
schedulerId = sitemapXmlReaderConfiguration.name().hashCode();
productIndexFileUrl = sitemapXmlReaderConfiguration.xmlResponseURL();
isEnabled = sitemapXmlReaderConfiguration.enabled();
jcrPath = sitemapXmlReaderConfiguration.jcrPath();
domain = sitemapXmlReaderConfiguration.domain();
log.info("Scheduler activated: flag={}", isEnabled);
}

/**
* Modifies the sitemap scheduler id on modification
*
* @param config
*/
@Modified
protected void modified(SitemapConfiguration config) {
log.info("In modified(), modifying {}", config.name());
// Removing sitemap scheduler
removeScheduler();
// Updating the sitemap scheduler id
schedulerId = config.name().hashCode();
name = config.name();
// Add the sitemap scheduler
addScheduler(config);
}

/**
* This method deactivates the scheduler and removes it
*
* @param sitemapXmlReaderConfiguration
*/
@Deactivate
protected void deactivate(SitemapConfiguration sitemapXmlReaderConfiguration) {
// Removing the scheduler
removeScheduler();
}

/**
* This method removes the scheduler
*/
private void removeScheduler() {
log.info("Removing scheduler: {}", schedulerId);
// Unscheduling/removing the scheduler
scheduler.unschedule(String.valueOf(schedulerId));
}

/**
* This method adds the scheduler
*
* @param schedulerConfiguration
*/
private void addScheduler(SitemapConfiguration config) {
isEnabled = config.enabled();
if (isEnabled) {
ScheduleOptions scheduleOptions = scheduler.EXPR(config.scheduler_expression());
scheduleOptions.name(config.name());
scheduleOptions.canRunConcurrently(false);
scheduler.schedule(this, scheduleOptions);
log.info("Sitemap Scheduler {} is added", name);
} else {
log.info("Sitemap Scheduler {} is disabled", name);
}
}

/**
* Overridden run method to execute Job
*/
@Override
public void run() {
log.info("In Scheduler run(), isEnabled:{}", isEnabled);
if (isEnabled) {
ProductList productList = null;
if (productIndexFileUrl != null && !productIndexFileUrl.isEmpty()) {
log.info("Product XML URL: {}", productIndexFileUrl);
productList = sitemapXmlReaderService.readXMLFromURL(productIndexFileUrl);
sitemapXmlWriterService.createProductSiteMap(domain, productList, jcrPath, "url");
}
} else {
log.info("Sitemap Scheduler is not enabled");
}
}

private List configurationList;

@Reference(name = "configurationFactory", cardinality = ReferenceCardinality.MULTIPLE, policy = ReferencePolicy.DYNAMIC)
protected synchronized void bindConfigurationFactory(final SitemapService config) {
log.info("bindConfigurationFactory: {}", config.name());
if (configurationList == null) {
configurationList = new ArrayList<>();
}
configurationList.add(config);
final Runnable job = new Runnable() {
public void run() {
schedulerId = config.name().hashCode();
name = config.name();
productIndexFileUrl = config.xmlResponseURL();
isEnabled = config.enabled();
jcrPath = config.jcrPath();
domain = config.domain();
log.info("In Scheduler runnable run(), isEnabled:{}", isEnabled);
if (isEnabled) {
ProductList productList = null;
if (productIndexFileUrl != null && !productIndexFileUrl.isEmpty()) {
log.info("Product XML URL: {}", productIndexFileUrl);
productList = sitemapXmlReaderService.readXMLFromURL(productIndexFileUrl);
sitemapXmlWriterService.createProductSiteMap(domain, productList, jcrPath, "url");
}
} else {
log.info("Sitemap Scheduler is not enabled");
}

}
};

if (isEnabled) {
ScheduleOptions scheduleOptions = scheduler.EXPR(config.cronExpression());
scheduleOptions.name(config.name());
scheduleOptions.canRunConcurrently(false);
scheduler.schedule(this, scheduleOptions);
log.info("Sitemap Scheduler {} is added", name);
} else {
log.info("Sitemap Scheduler {} is disabled", name);
}

}

protected synchronized void unbindConfigurationFactory(final SitemapService config) {
log.info("unbindConfigurationFactory: {}", config.name());
configurationList.remove(config);
}

public List getConfigurationList() {
return configurationList;
}

public void setConfigurationList(List configurationList) {
this.configurationList = configurationList;
}
}

Create an interface to read the XML file
package com.aemquickstart.core.services;

import com.aemquickstart.core.models.ProductList;

public interface SitemapXmlReaderService {

/**
* This method writes XML data into JCR
*/
public ProductList readXMLFromURL(String responseURL);
}

Create an implementation class to read the XML file
package com.aemquickstart.core.services.impl;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;

import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlReaderService;

@Component(immediate = true, service = SitemapXmlReaderService.class)
public class SitemapXmlReaderServiceImpl implements SitemapXmlReaderService {

// Logger
private final Logger log = LoggerFactory.getLogger(this.getClass());

// JAXB instance
private JAXBContext jaxbContext;

// JAXB Unmarshaller
private Unmarshaller unmarshaller;

@Override
public ProductList readXMLFromURL(String responseURL) {
log.info("In readXMLFromURL");
URLConnection urlConnection = null;
InputStreamReader inputStreamReader = null;
StringBuilder builder = new StringBuilder();
ProductList productList = null;

try {
URL url = new URL(responseURL);
urlConnection = url.openConnection();
if (urlConnection != null) {
urlConnection.setReadTimeout(30 * 1000);
}
if (urlConnection != null && urlConnection.getInputStream() != null) {
inputStreamReader = new InputStreamReader(urlConnection.getInputStream(), Charset.defaultCharset());
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
if (bufferedReader != null) {
int eof;
while ((eof = bufferedReader.read()) != -1) {
builder.append((char) eof);
}
bufferedReader.close();
}
}
log.info("closing input stream");
inputStreamReader.close();
} catch (Exception e) {
log.error(e.getMessage(), e);
}

String xmlResponse = builder.toString();
log.debug("xmlResponse: {}", xmlResponse);
try {

jaxbContext = JAXBContext.newInstance(ProductList.class);

unmarshaller = jaxbContext.createUnmarshaller();

productList = (ProductList) unmarshaller.unmarshal(new StringReader(xmlResponse));
log.info("ProductList: {}", productList);
} catch (JAXBException e) {
log.info(e.getMessage(), e);
}

return productList;
}
}

Create a model to parse the XML
package com.aemquickstart.core.models;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement(name="cxd")
@XmlAccessorType(XmlAccessType.FIELD)
public class ProductList {
@XmlElement
private Channel[] channel = new Channel[1];

public Channel[] getChannel() {
return channel;
}

public void setChannel(Channel[] channel) {
this.channel = channel;
}
}

Create Channel.java
package com.aemquickstart.core.models;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name="channel")
public class Channel {
@XmlElement
private Item[] Item = new Item[1]; // as the tag name in the xml file..

public Item[] getItem() {
return Item;
}

public void setItem(Item[] item) {
Item = item;
}
}


Create Item.java to read all elements
package com.aemquickstart.core.models;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement(name="Item")
@XmlAccessorType(XmlAccessType.FIELD)
public class Item {
@XmlElement
private String ProductId;
@XmlElement
private String title;
@XmlElement
private String pubDate;
//@XmlElement
public String getProductId() {
return ProductId;
}
public void setProductId(String productId) {
ProductId = productId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPubDate() {
return pubDate;
}
public void setPubDate(String pubDate) {
this.pubDate = pubDate;
}
}


Create a service to write the sitemap to XML in JCR
package com.aemquickstart.core.services;

import com.aemquickstart.core.models.ProductList;

public interface SitemapXmlWriterService {

public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from);
}


Create an implementation class to create the sitemap
package com.aemquickstart.core.services.impl;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;

import javax.jcr.Binary;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.ValueFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import com.aemquickstart.core.models.Channel;
import com.aemquickstart.core.models.Item;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlWriterService;
import com.day.cq.wcm.api.Page;
import com.day.cq.wcm.api.PageManager;

@Component(immediate = true, service = SitemapXmlWriterService.class)
public class SitemapXmlWriterServiceImpl implements SitemapXmlWriterService {

// Logger
private final Logger log = LoggerFactory.getLogger(this.getClass());

// Injecting ResourceResolverFactory
@Reference
private ResourceResolverFactory resourceResolverFactory;

// JCR session
private Session session;

private Session getSession(ResourceResolver resourceResolver) {
try {

// Getting the session by adapting the resourceResolver
session = resourceResolver.adaptTo(Session.class);

} catch (Exception e) {
log.error(e.getMessage(), e);
}
return session;
}

private ResourceResolver getResourceResolver() throws LoginException {
Map xmlReaderMap = new HashMap<>();
xmlReaderMap.put(ResourceResolverFactory.SUBSERVICE, "aemquickstartSubservice");

// Getting ResourceResovler
ResourceResolver resourceResolver = resourceResolverFactory.getServiceResourceResolver(xmlReaderMap);
return resourceResolver;
}

@Override
public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from) {
log.info("createProductSiteMap: {}", from);
ResourceResolver resourceResolver = null;
try {
// Map for service user details
resourceResolver = getResourceResolver();
session = getSession(resourceResolver);
if (!session.itemExists(jcrPath)) {
log.info("Provided path does not exist. Sitemap file can't be created under {}", jcrPath);
return;
} else {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.newDocument();
doc.setXmlStandalone(true);
Element rootElement = doc.createElement("urlset");
rootElement.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9");
doc.appendChild(rootElement);
addProductsToXml(domain, rootElement, jcrPath, doc, productList);
PageManager pgMgr = resourceResolver.adaptTo(PageManager.class);
Page homepage = pgMgr.getPage(jcrPath);
if (null != homepage) {
String sitemapFile = jcrPath + "/sitemap_products.xml";
generateXmlFile(jcrPath, doc, session, sitemapFile);
}
}

} catch (Exception e) {
log.error(e.getMessage(), e);
}
finally {
resourceResolver.close();
if(session != null) {
session.logout();
log.info("Session is logged out");
}
}
}

private void addProductsToXml(String domain, Element rootElement, String jcrPath, Document doc,
ProductList productList) {
// Getting the products from ProductList
Channel[] channels = productList.getChannel();

// Iterate for each item present in the XML file
log.info("Setting properties");
Item[] products = channels[0].getItem();
for (Item product : products) {
String productId = product.getProductId();
Element pdpUrlElement = doc.createElement("url");
Element pdpLoc = doc.createElement("loc");
String title = product.getTitle();
String pdpUrl = domain + jcrPath + "/pdp.html/" + title.replaceAll("[^a-zA-Z0-9-]", "") + "/" + productId;
pdpLoc.appendChild(doc.createTextNode(pdpUrl));
pdpUrlElement.appendChild(pdpLoc);
Calendar lastModified = Calendar.getInstance();
if (null != lastModified) {
Element pdpLstMod = doc.createElement("lastmod");
FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
pdpLstMod.appendChild(doc.createTextNode(DATE_FORMAT.format(lastModified.getTimeInMillis())));
pdpUrlElement.appendChild(pdpLstMod);
}

Element pdpChangeFreq = doc.createElement("changefreq");
pdpChangeFreq.appendChild(doc.createTextNode("Weekly"));
pdpUrlElement.appendChild(pdpChangeFreq);

rootElement.appendChild(pdpUrlElement); // PDP page is added to
// root.
}

}

private void generateXmlFile(String jcrPath, Document doc, Session session, String sitemapFile) {
try {
try {
log.info("sitemap file: {}", sitemapFile);
if (session.itemExists(sitemapFile)) {
log.info("Sitemap exists");
session.removeItem(sitemapFile);
session.save();
log.info("Old Sitemap is deleted");
}
} catch (Exception e) {
log.error("Exception while removing xml file: {} ", e);
}

ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Source xmlSource = new DOMSource(doc);
Result outputTarget = new StreamResult(outputStream);
TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
InputStream is = new ByteArrayInputStream(outputStream.toByteArray());

// create file at file location
ValueFactory valueFactory = session.getValueFactory();
Binary contentValue;
contentValue = valueFactory.createBinary(is);
Node homepageNode = session.getNode(jcrPath);
Node sitemapNode = homepageNode.addNode("sitemap_products.xml", "nt:file");
Node resNode = sitemapNode.addNode("jcr:content", "nt:resource");
resNode.setProperty("jcr:data", contentValue);
resNode.setProperty("jcr:mimeType", "text/xml");
Calendar lastModified = Calendar.getInstance();
lastModified.setTimeInMillis(lastModified.getTimeInMillis());
resNode.setProperty("jcr:lastModified", lastModified);
session.save();
log.info("Sitemap is successfull created at {}", sitemapFile);

} catch (RepositoryException rpe) {
log.error("Exception in Text Renderer: {}", rpe);
} catch (Exception e) {
log.error("Exception in while writting or creating file Renderer: {}", e);

}

}
}

Create runmodes configurations
<?xml version="1.0" encoding="UTF-8"?>
<jcr:root xmlns:sling="http://sling.apache.org/jcr/sling/1.0"
xmlns:jcr="http://www.jcp.org/jcr/1.0" jcr:primaryType="sling:OsgiConfig"
name="Sitemap Scheduler - CA"
enabled="{Boolean}true"
cronExpression="0 0/5 * * * ?"
xmlResponseURL="http://localhost:4502/productfeed.xml"
jcrPath="/content/aemquickstart"
domain="https://localhost"/>


Now build the project. Open ConfigMgr and search for "AEM Quickstart Sitemap Configuration"


Enter Scheduler name and select enabled checkbox.
Enter cross expression - to update the frequency.
Enter the XML file URL - where you would like to read the product info.
Enter JCR path - where you need to upload your sitemap_products.xml file
Enter Domain - this value will be used while creating the product url for attributeCheck if jobs are running, navigate to http://localhost:4502/system/console/status-slingscheduler, and search for the job.



Once the scheduler is running, the sitemap_products.xml file will be created under /content/aemquickstart/en




By aem4beginner

No comments:

Post a Comment

If you have any doubts or questions, please let us know.