diff --git a/app/.attach_pid781771 b/app/.attach_pid781771 new file mode 100644 index 000000000..e69de29bb diff --git a/app/src/main/java/fr/free/nrw/commons/category/CategoriesModel.kt b/app/src/main/java/fr/free/nrw/commons/category/CategoriesModel.kt index fd90be95f..47147944c 100644 --- a/app/src/main/java/fr/free/nrw/commons/category/CategoriesModel.kt +++ b/app/src/main/java/fr/free/nrw/commons/category/CategoriesModel.kt @@ -36,37 +36,35 @@ class CategoriesModel * @return */ fun isSpammyCategory(item: String): Boolean { - // Check for current and previous year to exclude these categories from removal - val now = Calendar.getInstance() - val curYear = now[Calendar.YEAR] - val curYearInString = curYear.toString() - val prevYear = curYear - 1 - val prevYearInString = prevYear.toString() - Timber.d("Previous year: %s", prevYearInString) - - val mentionsDecade = item.matches(".*0s.*".toRegex()) - val recentDecade = item.matches(".*20[0-2]0s.*".toRegex()) - val spammyCategory = - item.matches("(.*)needing(.*)".toRegex()) || - item.matches("(.*)taken on(.*)".toRegex()) // always skip irrelevant categories such as Media_needing_categories_as_of_16_June_2017(Issue #750) + val spammyCategory = item.matches("(.*)needing(.*)".toRegex()) + || item.matches("(.*)taken on(.*)".toRegex()) + + // checks for + // dd/mm/yyyy or yy + // yyyy or yy/mm/dd + // yyyy or yy/mm + // mm/yyyy or yy + // for `yy` it is assumed that 20XX is implicit. + // with separators [., /, -] + val isIrrelevantCategory = + item.contains("""\d{1,2}[-/.]\d{1,2}[-/.]\d{2,4}|\d{2,4}[-/.]\d{1,2}[-/.]\d{1,2}|\d{2,4}[-/.]\d{1,2}|\d{1,2}[-/.]\d{2,4}""".toRegex()) + + if (spammyCategory) { return true } - if (mentionsDecade) { - // Check if the year in the form of XX(X)0s is recent/relevant, i.e. in the 2000s or 2010s/2020s as stated in Issue #1029 - // Example: "2020s" is OK, but "1920s" is not (and should be skipped) - return !recentDecade - } else { - // If it is not an year in decade form (e.g. 19xxs/20xxs), then check if item contains a 4-digit year - // anywhere within the string (.* is wildcard) (Issue #47) - // And that item does not equal the current year or previous year - return item.matches(".*(19|20)\\d{2}.*".toRegex()) && - !item.contains(curYearInString) && - !item.contains(prevYearInString) + if(isIrrelevantCategory){ + return true } + + val hasYear = item.matches("(.*\\d{4}.*)".toRegex()) + val validYearsRange = item.matches(".*(20[0-9]{2}).*".toRegex()) + + // finally if there's 4 digits year exists in XXXX it should only be in 20XX range. + return hasYear && !validYearsRange } /** diff --git a/app/src/test/kotlin/fr/free/nrw/commons/category/CategoriesModelTest.kt b/app/src/test/kotlin/fr/free/nrw/commons/category/CategoriesModelTest.kt index 8c336470a..21fdba2f5 100644 --- a/app/src/test/kotlin/fr/free/nrw/commons/category/CategoriesModelTest.kt +++ b/app/src/test/kotlin/fr/free/nrw/commons/category/CategoriesModelTest.kt @@ -11,6 +11,7 @@ import fr.free.nrw.commons.upload.GpsCategoryModel import io.reactivex.Single import io.reactivex.subjects.BehaviorSubject import media +import org.junit.Assert import org.junit.Before import org.junit.Test import org.mockito.ArgumentMatchers @@ -331,4 +332,42 @@ class CategoriesModelTest { media(), ) } + + @Test + fun `test valid input with XXXX in it between the expected range 20XX`() { + val input = categoriesModel.isSpammyCategory("Amavenita (ship, 2014)") + Assert.assertFalse(input) + } + + @Test + fun `test valid input with XXXXs in it between the expected range 20XXs`() { + val input = categoriesModel.isSpammyCategory("Amavenita (ship, 2014s)") + Assert.assertFalse(input) + } + + @Test + fun `test invalid category when have needing in the input`() { + val input = categoriesModel.isSpammyCategory("Media needing categories as of 30 March 2017") + Assert.assertTrue(input) + } + + @Test + fun `test invalid category when have taken on in the input`() { + val input = categoriesModel.isSpammyCategory("Photographs taken on 2015-12-08") + Assert.assertTrue(input) + } + + @Test + fun `test invalid category when have yy mm or yy mm dd in the input`() { + // filtering based on [., /, -] separators between the dates. + val input = categoriesModel.isSpammyCategory("Image class 09.14") + Assert.assertTrue(input) + } + + @Test + fun `test invalid category when have years not in 20XX range`() { + val input = categoriesModel.isSpammyCategory("Japan in the 1400s") + Assert.assertTrue(input) + } + }