[MPLUGIN-96] Handle character encoding properly in makeHtmlValid()
git-svn-id: https://svn.apache.org/repos/asf/maven/plugin-tools/trunk@643558 13f79535-47bb-0310-9956-ffa450edef68master
parent
6e025f5245
commit
6ebf227d09
|
|
@ -19,7 +19,10 @@ package org.apache.maven.tools.plugin.util;
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
|
|
@ -38,10 +41,9 @@ import org.apache.maven.reporting.MavenReport;
|
|||
import org.codehaus.plexus.component.repository.ComponentDependency;
|
||||
import org.codehaus.plexus.util.DirectoryScanner;
|
||||
import org.codehaus.plexus.util.FileUtils;
|
||||
import org.codehaus.plexus.util.StringInputStream;
|
||||
import org.codehaus.plexus.util.StringOutputStream;
|
||||
import org.codehaus.plexus.util.StringUtils;
|
||||
import org.codehaus.plexus.util.xml.XMLWriter;
|
||||
import org.w3c.tidy.Configuration;
|
||||
import org.w3c.tidy.Tidy;
|
||||
|
||||
/**
|
||||
|
|
@ -241,29 +243,42 @@ public final class PluginUtils
|
|||
return "";
|
||||
}
|
||||
|
||||
StringOutputStream out = new StringOutputStream();
|
||||
String commentCleaned = decodeJavadocTags( description );
|
||||
|
||||
// Using jTidy to clean comment
|
||||
Tidy tidy = new Tidy();
|
||||
tidy.setDocType( "loose" );
|
||||
tidy.setXHTML( true );
|
||||
tidy.setXmlOut( true );
|
||||
tidy.setCharEncoding( Configuration.UTF8 );
|
||||
tidy.setMakeClean( true );
|
||||
tidy.setNumEntities( true );
|
||||
tidy.setQuoteNbsp( false );
|
||||
tidy.setQuiet( true );
|
||||
tidy.setShowWarnings( false );
|
||||
tidy.parse( new StringInputStream( decodeJavadocTags( description ) ), out );
|
||||
try
|
||||
{
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream( commentCleaned.length() + 256 );
|
||||
tidy.parse( new ByteArrayInputStream( commentCleaned.getBytes( "UTF-8" ) ), out );
|
||||
commentCleaned = out.toString("UTF-8");
|
||||
}
|
||||
catch ( UnsupportedEncodingException e )
|
||||
{
|
||||
// cannot happen as every JVM must support UTF-8, see also class javadoc for java.nio.charset.Charset
|
||||
}
|
||||
|
||||
// strip the header/body stuff
|
||||
String LS = System.getProperty( "line.separator" );
|
||||
String commentCleaned = out.toString();
|
||||
if ( StringUtils.isEmpty( commentCleaned ) )
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
// strip the header/body stuff
|
||||
String LS = System.getProperty( "line.separator" );
|
||||
int startPos = commentCleaned.indexOf( "<body>" + LS ) + 6 + LS.length();
|
||||
int endPos = commentCleaned.indexOf( LS + "</body>" );
|
||||
commentCleaned = commentCleaned.substring( startPos, endPos );
|
||||
|
||||
return commentCleaned.substring( startPos, endPos );
|
||||
return commentCleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -130,8 +130,19 @@ public class PluginUtilsTest
|
|||
|
||||
// wrong HTML
|
||||
javadoc = "Generates <i>something</i> <b> for the project.";
|
||||
assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils
|
||||
.makeHtmlValid( javadoc ) );
|
||||
assertEquals( "Generates <i>something</i> <b> for the project.</b>", PluginUtils.makeHtmlValid( javadoc ) );
|
||||
|
||||
// special characters
|
||||
javadoc = "& & < > \u00A0";
|
||||
assertEquals( "& & < > \u00A0", PluginUtils.makeHtmlValid( javadoc ) );
|
||||
|
||||
// non ASCII characters
|
||||
javadoc = "\u00E4 \u00F6 \u00FC \u00DF";
|
||||
assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
|
||||
|
||||
// non Latin1 characters
|
||||
javadoc = "\u0130 \u03A3 \u05D0 \u06DE";
|
||||
assertEquals( javadoc, PluginUtils.makeHtmlValid( javadoc ) );
|
||||
}
|
||||
|
||||
public void testDecodeJavadocTags()
|
||||
|
|
|
|||
Loading…
Reference in New Issue