[MPLUGIN-96] Handle character encoding properly in makeHtmlValid()
git-svn-id: https://svn.apache.org/repos/asf/maven/plugin-tools/trunk@643558 13f79535-47bb-0310-9956-ffa450edef68master
parent
6e025f5245
commit
6ebf227d09
|
|
@ -19,7 +19,10 @@ package org.apache.maven.tools.plugin.util;
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLClassLoader;
|
import java.net.URLClassLoader;
|
||||||
|
|
@ -38,10 +41,9 @@ import org.apache.maven.reporting.MavenReport;
|
||||||
import org.codehaus.plexus.component.repository.ComponentDependency;
|
import org.codehaus.plexus.component.repository.ComponentDependency;
|
||||||
import org.codehaus.plexus.util.DirectoryScanner;
|
import org.codehaus.plexus.util.DirectoryScanner;
|
||||||
import org.codehaus.plexus.util.FileUtils;
|
import org.codehaus.plexus.util.FileUtils;
|
||||||
import org.codehaus.plexus.util.StringInputStream;
|
|
||||||
import org.codehaus.plexus.util.StringOutputStream;
|
|
||||||
import org.codehaus.plexus.util.StringUtils;
|
import org.codehaus.plexus.util.StringUtils;
|
||||||
import org.codehaus.plexus.util.xml.XMLWriter;
|
import org.codehaus.plexus.util.xml.XMLWriter;
|
||||||
|
import org.w3c.tidy.Configuration;
|
||||||
import org.w3c.tidy.Tidy;
|
import org.w3c.tidy.Tidy;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -241,29 +243,42 @@ public final class PluginUtils
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
StringOutputStream out = new StringOutputStream();
|
String commentCleaned = decodeJavadocTags( description );
|
||||||
|
|
||||||
// Using jTidy to clean comment
|
// Using jTidy to clean comment
|
||||||
Tidy tidy = new Tidy();
|
Tidy tidy = new Tidy();
|
||||||
tidy.setDocType( "loose" );
|
tidy.setDocType( "loose" );
|
||||||
tidy.setXHTML( true );
|
tidy.setXHTML( true );
|
||||||
tidy.setXmlOut( true );
|
tidy.setXmlOut( true );
|
||||||
|
tidy.setCharEncoding( Configuration.UTF8 );
|
||||||
tidy.setMakeClean( true );
|
tidy.setMakeClean( true );
|
||||||
|
tidy.setNumEntities( true );
|
||||||
|
tidy.setQuoteNbsp( false );
|
||||||
tidy.setQuiet( true );
|
tidy.setQuiet( true );
|
||||||
tidy.setShowWarnings( false );
|
tidy.setShowWarnings( false );
|
||||||
tidy.parse( new StringInputStream( decodeJavadocTags( description ) ), out );
|
try
|
||||||
|
{
|
||||||
|
ByteArrayOutputStream out = new ByteArrayOutputStream( commentCleaned.length() + 256 );
|
||||||
|
tidy.parse( new ByteArrayInputStream( commentCleaned.getBytes( "UTF-8" ) ), out );
|
||||||
|
commentCleaned = out.toString("UTF-8");
|
||||||
|
}
|
||||||
|
catch ( UnsupportedEncodingException e )
|
||||||
|
{
|
||||||
|
// cannot happen as every JVM must support UTF-8, see also class javadoc for java.nio.charset.Charset
|
||||||
|
}
|
||||||
|
|
||||||
// strip the header/body stuff
|
|
||||||
String LS = System.getProperty( "line.separator" );
|
|
||||||
String commentCleaned = out.toString();
|
|
||||||
if ( StringUtils.isEmpty( commentCleaned ) )
|
if ( StringUtils.isEmpty( commentCleaned ) )
|
||||||
{
|
{
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// strip the header/body stuff
|
||||||
|
String LS = System.getProperty( "line.separator" );
|
||||||
int startPos = commentCleaned.indexOf( "<body>" + LS ) + 6 + LS.length();
|
int startPos = commentCleaned.indexOf( "<body>" + LS ) + 6 + LS.length();
|
||||||
int endPos = commentCleaned.indexOf( LS + "</body>" );
|
int endPos = commentCleaned.indexOf( LS + "</body>" );
|
||||||
|
commentCleaned = commentCleaned.substring( startPos, endPos );
|
||||||
|
|
||||||
return commentCleaned.substring( startPos, endPos );
|
return commentCleaned;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -130,8 +130,19 @@ public class PluginUtilsTest
|
||||||
|
|
||||||
// wrong HTML
|
// wrong HTML
|
||||||
javadoc = "Generates <i>something</i> <b> for the project.";
|
javadoc = "Generates <i>something</i> <b> for the project.";
|
||||||
assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils
|
assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils.makeHtmlValid( javadoc ) );
|
||||||
.makeHtmlValid( javadoc ) );
|
|
||||||
|
// special characters
|
||||||
|
javadoc = "& & < > \u00A0";
|
||||||
|
assertEquals( "& & < > \u00A0", PluginUtils.makeHtmlValid( javadoc ) );
|
||||||
|
|
||||||
|
// non ASCII characters
|
||||||
|
javadoc = "\u00E4 \u00F6 \u00FC \u00DF";
|
||||||
|
assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
|
||||||
|
|
||||||
|
// non Latin1 characters
|
||||||
|
javadoc = "\u0130 \u03A3 \u05D0 \u06DE";
|
||||||
|
assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDecodeJavadocTags()
|
public void testDecodeJavadocTags()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue