<?xml version="1.0" encoding="UTF-8"?>
<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<person>
<height>1.81</height>
<dob>1990-01-17</dob>
</person>
<person>
<height>1.51</height>
<dob>1981-09-10</dob>
</person>
<person>
<height>1.31</height>
<dob>1990-02-07</dob>
</person>
<person>
<height>1.71</height>
<dob>1981-09-20</dob>
</person>
</people>
I want to create an xml file (averageHeight.xml) that sorts the people into average height by month within year.
e.g
<average_height>
<year no=”1981”>
<month no=”09”>
<height>1.61</height>
</month>
</year>
….
Let’s break the problem down into manageable chunks.
Here’s a simple transform that extracts the dob of each person
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:element name="average_height">
<xsl:apply-templates select="//person"/>
</xsl:element>
</xsl:template>
<xsl:template match="person">
<xsl:element name="year">
<xsl:value-of select="dob"/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
This gives:
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year>1990-01-17</year>
<year>1980-11-10</year>
<year>1990-02-07</year>
<year>1981-09-20</year>
</average_height>
Now we want to write it to averageHeight.xml, so let’s add the <xsl:result-document href=" averageHeight.xml">
To give:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="//person"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="person">
<xsl:element name="year">
<xsl:value-of select="dob"/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
This now gives averageHeight.xml with the following content
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year>1990-01-17</year>
<year>1980-11-10</year>
<year>1990-02-07</year>
<year>1981-09-20</year>
</average_height>
Now, let’s extract and sort the year: but first we need to change the xslt to use a for-each so we can sort:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:for-each select="person">
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives:
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year>1990</year>
<year>1980</year>
<year>1990</year>
<year>1981</year>
</average_height>
Now, let’s sort it:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
to give:
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year>1980</year>
<year>1981</year>
<year>1990</year>
<year>1990</year>
</average_height>
Now let’s eliminate duplicates:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:element name="year">
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year>1980</year>
<year>1981</year>
<year>1990</year>
</average_height>
now, let’s add in the months:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1980">
<month>11</month>
</year>
<year no="1981">
<month>09</month>
</year>
<year no="1990">
<month>01</month>
<month>02</month>
</year>
</average_height>
I want to introduce a duplicate month to test the removal of duplicate months. So let's change our original xml to this:
<?xml version="1.0" encoding="UTF-8"?>
<people xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<person>
<height>1.81</height>
<dob>1990-01-17</dob>
</person>
<person>
<height>1.51</height>
<dob>1981-09-10</dob>
</person>
<person>
<height>1.31</height>
<dob>1990-02-07</dob>
</person>
<person>
<height>1.71</height>
<dob>1981-09-20</dob>
</person>
</people>
and transform it with:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives:
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month>09</month>
<month>09</month>
</year>
<year no="1990">
<month>01</month>
<month>02</month>
</year>
</average_height>
now, let’s remove duplicate months: - part 1 – create the variable:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="$months-in-years/year">
<xsl:copy-of select="."/>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month>09</month>
<month>09</month>
</year>
<year no="1990">
<month>01</month>
<month>02</month>
</year>
</average_height>
now, remove the duplicates:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="$months-in-years/year">
<xsl:sort select="."/>
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="@no"/></xsl:attribute>
<xsl:for-each select="month[not(.=preceding::month)]">
<xsl:sort select="."/>
<xsl:element name="month">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month no="09"/>
</year>
<year no="1990">
<month no="01"/>
<month no="02"/>
</year>
</average_height>
now, let’s add the dob’s as an interim measure
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="$months-in-years/year">
<xsl:sort select="."/>
<xsl:element name="year">
<xsl:variable name="year" select="@no"/>
<xsl:attribute name="no"><xsl:value-of select="@no"/></xsl:attribute>
<xsl:for-each select="month[not(.=preceding::month)]">
<xsl:sort select="."/>
<xsl:variable name="month" select="."/>
<xsl:element name="month">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year and substring(dob,6,2)=$month]">
<xsl:element name="dob">
<xsl:value-of select="dob"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
to give:
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month no="09">
<dob>1981-09-10</dob>
<dob>1981-09-20</dob>
</month>
</year>
<year no="1990">
<month no="01">
<dob>1990-01-17</dob>
</month>
<month no="02">
<dob>1990-02-07</dob>
</month>
</year>
</average_height>
now, let’s change that dob to height:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="$months-in-years/year">
<xsl:sort select="."/>
<xsl:element name="year">
<xsl:variable name="year" select="@no"/>
<xsl:attribute name="no"><xsl:value-of select="@no"/></xsl:attribute>
<xsl:for-each select="month[not(.=preceding::month)]">
<xsl:sort select="."/>
<xsl:variable name="month" select="."/>
<xsl:element name="month">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year and substring(dob,6,2)=$month]">
<xsl:element name="height">
<xsl:value-of select="height"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month no="09">
<height>1.51</height>
<height>1.71</height>
</month>
</year>
<year no="1990">
<month no="01">
<height>1.81</height>
</month>
<month no="02">
<height>1.31</height>
</month>
</year>
</average_height>
now let’s average the heights
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="$years/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="$people/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="$months-in-years/year">
<xsl:sort select="."/>
<xsl:element name="year">
<xsl:variable name="year" select="@no"/>
<xsl:attribute name="no"><xsl:value-of select="@no"/></xsl:attribute>
<xsl:for-each select="month[not(.=preceding::month)]">
<xsl:sort select="."/>
<xsl:variable name="month" select="."/>
<xsl:element name="month">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:element name="height">
<xsl:value-of select="avg($people/person[substring(dob,1,4)=$year and substring(dob,6,2)=$month]/height)"/>
</xsl:element>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
gives
<?xml version="1.0" encoding="UTF-8"?>
<average_height>
<year no="1981">
<month no="09">
<height>1.61</height>
</month>
</year>
<year no="1990">
<month no="01">
<height>1.81</height>
</month>
<month no="02">
<height>1.31</height>
</month>
</year>
</average_height>
job done...
Actually, there's one problem that can arise with xalan parsers.
If you get the error message:
Can not convert #RTREEFRAG to a NodeList!
Then you need to include the xalan namespace as follows:
xmlns:xalan="http://xml.apache.org/xalan"
exclude-result-prefixes="xalan">
and replace and references to variables that are node lists as follows:
<xsl:for-each select="$rtf/docelem//*">
becomes
<xsl:for-each select="xalan:nodeset($variablename)/docelem//*">
the final transform from our example, for use with a xalan parser becomes:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xalan="http://xml.apache.org/xalan" exclude-result-prefixes="xalan">
<xsl:output method="xml"/>
<xsl:template match="/">
<xsl:result-document href=" averageHeight.xml">
<xsl:element name="average_height">
<xsl:apply-templates select="/people"/>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="people">
<xsl:variable name="people" select="."/>
<xsl:variable name="years">
<xsl:for-each select="person">
<xsl:sort select="substring(dob,1,4)"/>
<xsl:element name="year">
<xsl:value-of select="substring(dob,1,4)"/>
</xsl:element>
</xsl:for-each>
</xsl:variable>
<xsl:for-each select="xalan:nodeset($years)/year[not(.=preceding::year)]">
<xsl:variable name="year" select="."/>
<xsl:variable name="months-in-years">
<xsl:element name="year">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:for-each select="xalan:nodeset($people)/person[substring(dob,1,4)=$year]">
<xsl:element name="month">
<xsl:value-of select="substring(dob,6,2)"/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:variable>
<xsl:for-each select="xalan:nodeset($months-in-years)/year">
<xsl:sort select="."/>
<xsl:element name="year">
<xsl:variable name="year" select="@no"/>
<xsl:attribute name="no"><xsl:value-of select="@no"/></xsl:attribute>
<xsl:for-each select="month[not(.=preceding::month)]">
<xsl:sort select="."/>
<xsl:variable name="month" select="."/>
<xsl:element name="month">
<xsl:attribute name="no"><xsl:value-of select="."/></xsl:attribute>
<xsl:element name="height">
<xsl:value-of select="avg($people/person[substring(dob,1,4)=$year and substring(dob,6,2)=$month]/height)"/>
</xsl:element>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
now, the job's done!!
need to change: for example
<xsl:for-each select="xalan:nodeset($year)/year[not(.=preceding::year)]">
becomes:
<xsl:for-each select="xalan:distinct(xalan:nodeset($year)/year)">
reference: http://xml.apache.org/xalan-c/extensionslib.html
No comments:
Post a Comment