Fixes #4934. Enforces Wikimedia character blocklisting on media titles. (#4955)

This commit is contained in:
AlexMahlon 2022-05-15 06:33:08 -04:00 committed by GitHub
parent d647c0674e
commit d8724f4541
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 212 additions and 0 deletions

View file

@ -3,6 +3,7 @@ package fr.free.nrw.commons.upload;
import android.app.Dialog; import android.app.Dialog;
import android.content.Intent; import android.content.Intent;
import android.text.Editable; import android.text.Editable;
import android.text.InputFilter;
import android.text.TextUtils; import android.text.TextUtils;
import android.text.TextWatcher; import android.text.TextWatcher;
import android.view.LayoutInflater; import android.view.LayoutInflater;
@ -165,6 +166,9 @@ public class UploadMediaDetailAdapter extends RecyclerView.Adapter<UploadMediaDe
captionInputLayout.setEndIconDrawable(R.drawable.mapbox_info_icon_default); captionInputLayout.setEndIconDrawable(R.drawable.mapbox_info_icon_default);
captionInputLayout.setEndIconOnClickListener(v -> captionInputLayout.setEndIconOnClickListener(v ->
callback.showAlert(R.string.media_detail_caption, R.string.caption_info)); callback.showAlert(R.string.media_detail_caption, R.string.caption_info));
Objects.requireNonNull(captionInputLayout.getEditText()).setFilters(new InputFilter[] {
new UploadMediaDetailInputFilter()
});
descInputLayout.setEndIconMode(TextInputLayout.END_ICON_CUSTOM); descInputLayout.setEndIconMode(TextInputLayout.END_ICON_CUSTOM);
descInputLayout.setEndIconDrawable(R.drawable.mapbox_info_icon_default); descInputLayout.setEndIconDrawable(R.drawable.mapbox_info_icon_default);

View file

@ -0,0 +1,80 @@
package fr.free.nrw.commons.upload;
import android.text.InputFilter;
import android.text.Spanned;
import java.util.regex.Pattern;
/**
* An {@link InputFilter} class that removes characters blocklisted in Wikimedia titles. The list
* of blocklisted characters is linked below.
* @see <a href="https://commons.wikimedia.org/wiki/MediaWiki:Titleblacklist"></a>wikimedia.org</a>
*/
public class UploadMediaDetailInputFilter implements InputFilter {
private final Pattern[] patterns;
/**
* Initializes the blocklisted patterns.
*/
public UploadMediaDetailInputFilter() {
patterns = new Pattern[]{
Pattern.compile("[\\x{00A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200B}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]"),
Pattern.compile("[\\x{202A}-\\x{202E}]"),
Pattern.compile("\\p{Cc}"),
Pattern.compile("\\x{FEFF}"),
Pattern.compile("\\x{00AD}"),
Pattern.compile("[\\x{E000}-\\x{F8FF}\\x{FFF0}-\\x{FFFF}]"),
Pattern.compile("[^\\x{0000}-\\x{FFFF}\\p{sc=Han}]")
};
}
/**
* Checks if the source text contains any blocklisted characters.
* @param source input text
* @return contains a blocklisted character
*/
private Boolean checkBlocklisted(final CharSequence source) {
for (final Pattern pattern: patterns) {
if (pattern.matcher(source).find()) {
return true;
}
}
return false;
}
/**
* Removes any blocklisted characters from the source text.
* @param source input text
* @return a cleaned character sequence
*/
private CharSequence removeBlocklisted(CharSequence source) {
for (final Pattern pattern: patterns) {
source = pattern.matcher(source).replaceAll("");
}
return source;
}
/**
* Filters out any blocklisted characters.
* @param source {@inheritDoc}
* @param start {@inheritDoc}
* @param end {@inheritDoc}
* @param dest {@inheritDoc}
* @param dstart {@inheritDoc}
* @param dend {@inheritDoc}
* @return {@inheritDoc}
*/
@Override
public CharSequence filter(CharSequence source, int start, int end, Spanned dest, int dstart,
int dend) {
if (checkBlocklisted(source)) {
if (start == dstart) {
return dest;
}
return removeBlocklisted(source);
}
return null;
}
}

View file

@ -0,0 +1,128 @@
package fr.free.nrw.commons.upload
import android.text.SpannableStringBuilder
import fr.free.nrw.commons.TestCommonsApplication
import org.junit.Assert
import org.junit.Test
import org.junit.runner.RunWith
import org.robolectric.RobolectricTestRunner
import org.robolectric.annotation.Config
import org.robolectric.annotation.LooperMode
@RunWith(RobolectricTestRunner::class)
@Config(sdk = [21], application = TestCommonsApplication::class)
@LooperMode(LooperMode.Mode.PAUSED)
class UploadMediaDetailInputFilterTest {
@Test
fun testFilterGeneric() {
val filter = UploadMediaDetailInputFilter()
val filters = arrayOf(filter)
val destination = SpannableStringBuilder("")
destination.filters = filters
val test: CharSequence = "test"
val expected = "test"
destination.insert(0, test)
Assert.assertEquals(destination.toString(), expected)
}
@Test
fun testFilterUnusualSpaces() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
//All unusual space characters
val tests = intArrayOf(0x00A0, 0x1680, 0x180E, 0x2000, 0x2005, 0x200B, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000)
for (test: Int in tests) {
builder.insert(0, String(Character.toChars(test)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
}
@Test
fun testFilterBiDiOverrides() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
//Sample of BiDI override characters
val tests = intArrayOf(0x202A, 0x202B, 0x202C, 0x202D, 0x202E)
for (test: Int in tests) {
builder.insert(0, String(Character.toChars(test)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
}
@Test
fun testFilterControlCharacters() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
//Sample of control characters
val tests = intArrayOf(0x00, 0x08, 0x10, 0x18, 0x1F, 0x7F)
for (test: Int in tests) {
builder.insert(0, String(Character.toChars(test)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
}
@Test
fun testFilterByteOrderMark() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
builder.insert(0, String(Character.toChars(0xFEFF)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
@Test
fun testFilterSoftHyphen() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
builder.insert(0, String(Character.toChars(0x00AD)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
@Test
fun testFilterSpecials() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
//Sample of surrogate and special characters
val tests = intArrayOf(0xE000, 0xE63F, 0xEC7E, 0xF2BD, 0xF8FF, 0xFFF0, 0xFFF4, 0xFFFC, 0xFFFF)
for (test: Int in tests) {
builder.insert(0, String(Character.toChars(test)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
}
@Test
fun testFilterNonBasicPlane() {
val builder = SpannableStringBuilder("")
builder.filters = arrayOf(UploadMediaDetailInputFilter())
//Sample of characters over 5 hex places not in the Han set
val testsExclude = intArrayOf(0x1FFFF, 0x44444, 0xFFFFF)
for (test: Int in testsExclude) {
builder.insert(0, String(Character.toChars(test)))
Assert.assertEquals(builder.toString(), "")
builder.clear()
}
//Sample of characters over 5 hex places in the Han set
val testsInclude = intArrayOf(0x20000, 0x2B740, 0x2F800)
val expected = SpannableStringBuilder("")
for (test: Int in testsInclude) {
builder.insert(0, String(Character.toChars(test)))
expected.insert(0, String(Character.toChars(test)))
}
Assert.assertEquals(builder.toString(), expected.toString())
}
}