From d8724f4541ef9bce33b8af841f8c86c82f945313 Mon Sep 17 00:00:00 2001 From: AlexMahlon <53351970+AlexMahlon@users.noreply.github.com> Date: Sun, 15 May 2022 06:33:08 -0400 Subject: [PATCH] Fixes #4934. Enforces Wikimedia character blocklisting on media titles. (#4955) --- .../upload/UploadMediaDetailAdapter.java | 4 + .../upload/UploadMediaDetailInputFilter.java | 80 +++++++++++ .../UploadMediaDetailInputFilterTest.kt | 128 ++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailInputFilter.java create mode 100644 app/src/test/kotlin/fr/free/nrw/commons/upload/UploadMediaDetailInputFilterTest.kt diff --git a/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailAdapter.java b/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailAdapter.java index 60c3d72d8..9eb8fbb5d 100644 --- a/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailAdapter.java +++ b/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailAdapter.java @@ -3,6 +3,7 @@ package fr.free.nrw.commons.upload; import android.app.Dialog; import android.content.Intent; import android.text.Editable; +import android.text.InputFilter; import android.text.TextUtils; import android.text.TextWatcher; import android.view.LayoutInflater; @@ -165,6 +166,9 @@ public class UploadMediaDetailAdapter extends RecyclerView.Adapter callback.showAlert(R.string.media_detail_caption, R.string.caption_info)); + Objects.requireNonNull(captionInputLayout.getEditText()).setFilters(new InputFilter[] { + new UploadMediaDetailInputFilter() + }); descInputLayout.setEndIconMode(TextInputLayout.END_ICON_CUSTOM); descInputLayout.setEndIconDrawable(R.drawable.mapbox_info_icon_default); diff --git a/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailInputFilter.java b/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailInputFilter.java new file mode 100644 index 000000000..91d46d728 --- /dev/null +++ b/app/src/main/java/fr/free/nrw/commons/upload/UploadMediaDetailInputFilter.java @@ -0,0 +1,80 @@ +package fr.free.nrw.commons.upload; + +import android.text.InputFilter; +import android.text.Spanned; +import java.util.regex.Pattern; + +/** + * An {@link InputFilter} class that removes characters blocklisted in Wikimedia titles. The list + * of blocklisted characters is linked below. + * @see wikimedia.org + */ +public class UploadMediaDetailInputFilter implements InputFilter { + private final Pattern[] patterns; + + /** + * Initializes the blocklisted patterns. + */ + public UploadMediaDetailInputFilter() { + patterns = new Pattern[]{ + Pattern.compile("[\\x{00A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200B}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]"), + Pattern.compile("[\\x{202A}-\\x{202E}]"), + Pattern.compile("\\p{Cc}"), + Pattern.compile("\\x{FEFF}"), + Pattern.compile("\\x{00AD}"), + Pattern.compile("[\\x{E000}-\\x{F8FF}\\x{FFF0}-\\x{FFFF}]"), + Pattern.compile("[^\\x{0000}-\\x{FFFF}\\p{sc=Han}]") + }; + } + + /** + * Checks if the source text contains any blocklisted characters. + * @param source input text + * @return contains a blocklisted character + */ + private Boolean checkBlocklisted(final CharSequence source) { + for (final Pattern pattern: patterns) { + if (pattern.matcher(source).find()) { + return true; + } + } + + return false; + } + + /** + * Removes any blocklisted characters from the source text. + * @param source input text + * @return a cleaned character sequence + */ + private CharSequence removeBlocklisted(CharSequence source) { + for (final Pattern pattern: patterns) { + source = pattern.matcher(source).replaceAll(""); + } + + return source; + } + + /** + * Filters out any blocklisted characters. + * @param source {@inheritDoc} + * @param start {@inheritDoc} + * @param end {@inheritDoc} + * @param dest {@inheritDoc} + * @param dstart {@inheritDoc} + * @param dend {@inheritDoc} + * @return {@inheritDoc} + */ + @Override + public CharSequence filter(CharSequence source, int start, int end, Spanned dest, int dstart, + int dend) { + if (checkBlocklisted(source)) { + if (start == dstart) { + return dest; + } + + return removeBlocklisted(source); + } + return null; + } +} \ No newline at end of file diff --git a/app/src/test/kotlin/fr/free/nrw/commons/upload/UploadMediaDetailInputFilterTest.kt b/app/src/test/kotlin/fr/free/nrw/commons/upload/UploadMediaDetailInputFilterTest.kt new file mode 100644 index 000000000..4a4894046 --- /dev/null +++ b/app/src/test/kotlin/fr/free/nrw/commons/upload/UploadMediaDetailInputFilterTest.kt @@ -0,0 +1,128 @@ +package fr.free.nrw.commons.upload + +import android.text.SpannableStringBuilder +import fr.free.nrw.commons.TestCommonsApplication +import org.junit.Assert +import org.junit.Test +import org.junit.runner.RunWith +import org.robolectric.RobolectricTestRunner +import org.robolectric.annotation.Config +import org.robolectric.annotation.LooperMode + +@RunWith(RobolectricTestRunner::class) +@Config(sdk = [21], application = TestCommonsApplication::class) +@LooperMode(LooperMode.Mode.PAUSED) +class UploadMediaDetailInputFilterTest { + + @Test + fun testFilterGeneric() { + val filter = UploadMediaDetailInputFilter() + val filters = arrayOf(filter) + val destination = SpannableStringBuilder("") + destination.filters = filters + + val test: CharSequence = "test" + val expected = "test" + destination.insert(0, test) + Assert.assertEquals(destination.toString(), expected) + } + + @Test + fun testFilterUnusualSpaces() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + //All unusual space characters + val tests = intArrayOf(0x00A0, 0x1680, 0x180E, 0x2000, 0x2005, 0x200B, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000) + for (test: Int in tests) { + builder.insert(0, String(Character.toChars(test))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + } + + @Test + fun testFilterBiDiOverrides() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + //Sample of BiDI override characters + val tests = intArrayOf(0x202A, 0x202B, 0x202C, 0x202D, 0x202E) + for (test: Int in tests) { + builder.insert(0, String(Character.toChars(test))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + } + + @Test + fun testFilterControlCharacters() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + //Sample of control characters + val tests = intArrayOf(0x00, 0x08, 0x10, 0x18, 0x1F, 0x7F) + for (test: Int in tests) { + builder.insert(0, String(Character.toChars(test))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + } + + @Test + fun testFilterByteOrderMark() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + builder.insert(0, String(Character.toChars(0xFEFF))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + + @Test + fun testFilterSoftHyphen() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + builder.insert(0, String(Character.toChars(0x00AD))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + + @Test + fun testFilterSpecials() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + //Sample of surrogate and special characters + val tests = intArrayOf(0xE000, 0xE63F, 0xEC7E, 0xF2BD, 0xF8FF, 0xFFF0, 0xFFF4, 0xFFFC, 0xFFFF) + for (test: Int in tests) { + builder.insert(0, String(Character.toChars(test))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + } + + @Test + fun testFilterNonBasicPlane() { + val builder = SpannableStringBuilder("") + builder.filters = arrayOf(UploadMediaDetailInputFilter()) + + //Sample of characters over 5 hex places not in the Han set + val testsExclude = intArrayOf(0x1FFFF, 0x44444, 0xFFFFF) + for (test: Int in testsExclude) { + builder.insert(0, String(Character.toChars(test))) + Assert.assertEquals(builder.toString(), "") + builder.clear() + } + + //Sample of characters over 5 hex places in the Han set + val testsInclude = intArrayOf(0x20000, 0x2B740, 0x2F800) + val expected = SpannableStringBuilder("") + for (test: Int in testsInclude) { + builder.insert(0, String(Character.toChars(test))) + expected.insert(0, String(Character.toChars(test))) + } + Assert.assertEquals(builder.toString(), expected.toString()) + } +} \ No newline at end of file