From 6b46d95bf31a15952c07e8072bc134bbbf02ca5f Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Wed, 8 Mar 2023 21:02:57 +0300 Subject: Improve performance of 'parseWithNormalisedSpaces' that is unconditionally invoked for each HTML text element (#2730) Addresses #2729 --- .../main/kotlin/translators/parseWithNormalisedSpaces.kt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'plugins/base') diff --git a/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt index 4bb60f1a..7bda9d0b 100644 --- a/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt +++ b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt @@ -44,7 +44,13 @@ internal fun String.parseHtmlEncodedWithNormalisedSpaces( */ internal fun String.parseWithNormalisedSpaces( renderWhiteCharactersAsSpaces: Boolean -): List = - //parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars - //But we dont need to do it for java as it is already parsed with jsoup - Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces) \ No newline at end of file +): List { + if (!requiresHtmlEncoding()) { + return parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces) + } + // parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars + // But we dont need to do it for java as it is already parsed with jsoup + return Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces) +} + +private fun String.requiresHtmlEncoding(): Boolean = indexOf('&') != -1 -- cgit