Update allowed recent years to include 2020s (#5761)

* document regex due to #47

* also count 2020s as "recent years"

* clarify that not all years are ignored

* clarify "year" is current year

* original logic fix
from https://github.com/commons-app/apps-android-commons/pull/5761#pullrequestreview-2144120347

* better variale name for ".*0s.*"
as that regex will match e.g. `1920s` and `80s` too, so the original `is20xxsYear` would be confusing name for it

* consolidate duplicated code to spammyCategory

* clarify regexes via variables

* spammyCategory should always be skipped

* return is simple now, so we can get rid of extra val oldDecade

* fix curYearInString

* some clarification comments

* refactor: rename containsYear to isSpammyCategory

This is done as the name containsYear is ambiguous.
It not just checks for year to identify spammy categories.

* refactor: rename containsYear to isSpammyCategory (take 2)

 A continuation of fe74c77ab (refactor: rename containsYear
 to isSpammyCategory, 2024-07-17)

---------

Co-authored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
This commit is contained in:
Matija Nalis 2024-07-20 13:16:20 +00:00 committed by GitHub
parent 34addbe33a
commit 7f6b45aeb6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 36 additions and 25 deletions

View file

@ -27,30 +27,42 @@ class CategoriesModel @Inject constructor(
private var selectedExistingCategories: MutableList<String> = mutableListOf()
/**
* Returns if the item contains an year
* @param item
* Returns true if an item is considered to be a spammy category which should be ignored
*
* @param item a category item that needs to be validated to know if it is spammy or not
* @return
*/
fun containsYear(item: String): Boolean {
fun isSpammyCategory(item: String): Boolean {
//Check for current and previous year to exclude these categories from removal
val now = Calendar.getInstance()
val year = now[Calendar.YEAR]
val yearInString = year.toString()
val prevYear = year - 1
val curYear = now[Calendar.YEAR]
val curYearInString = curYear.toString()
val prevYear = curYear - 1
val prevYearInString = prevYear.toString()
Timber.d("Previous year: %s", prevYearInString)
//Check if item contains a 4-digit word anywhere within the string (.* is wildcard)
//And that item does not equal the current year or previous year
//And if it is an irrelevant category such as Media_needing_categories_as_of_16_June_2017(Issue #750)
//Check if the year in the form of XX(X)0s is relevant, i.e. in the 2000s or 2010s as stated in Issue #1029
return item.matches(".*(19|20)\\d{2}.*".toRegex())
&& !item.contains(yearInString)
&& !item.contains(prevYearInString)
|| item.matches("(.*)needing(.*)".toRegex())
|| item.matches("(.*)taken on(.*)".toRegex())
|| item.matches(".*0s.*".toRegex())
&& !item.matches(".*(200|201)0s.*".toRegex())
val mentionsDecade = item.matches(".*0s.*".toRegex())
val recentDecade = item.matches(".*20[0-2]0s.*".toRegex())
val spammyCategory = item.matches("(.*)needing(.*)".toRegex())
|| item.matches("(.*)taken on(.*)".toRegex())
// always skip irrelevant categories such as Media_needing_categories_as_of_16_June_2017(Issue #750)
if (spammyCategory) {
return true
}
if (mentionsDecade) {
// Check if the year in the form of XX(X)0s is recent/relevant, i.e. in the 2000s or 2010s/2020s as stated in Issue #1029
// Example: "2020s" is OK, but "1920s" is not (and should be skipped)
return !recentDecade
} else {
// If it is not an year in decade form (e.g. 19xxs/20xxs), then check if item contains a 4-digit year
// anywhere within the string (.* is wildcard) (Issue #47)
// And that item does not equal the current year or previous year
return item.matches(".*(19|20)\\d{2}.*".toRegex())
&& !item.contains(curYearInString)
&& !item.contains(prevYearInString)
}
}
/**

View file

@ -148,8 +148,8 @@ public class UploadRepository {
* @param name
* @return
*/
public boolean containsYear(String name) {
return categoriesModel.containsYear(name);
public boolean isSpammyCategory(String name) {
return categoriesModel.isSpammyCategory(name);
}
/**

View file

@ -89,7 +89,7 @@ class CategoriesPresenter @Inject constructor(
if (media == null) {
return repository.searchAll(term, getImageTitleList(), repository.selectedDepictions)
.subscribeOn(ioScheduler)
.map { it.filter { categoryItem -> !repository.containsYear(categoryItem.name)
.map { it.filter { categoryItem -> !repository.isSpammyCategory(categoryItem.name)
|| categoryItem.name==term } }
} else {
return Observable.zip(
@ -103,7 +103,7 @@ class CategoriesPresenter @Inject constructor(
it1 + it2
}
.subscribeOn(ioScheduler)
.map { it.filter { categoryItem -> !repository.containsYear(categoryItem.name)
.map { it.filter { categoryItem -> !repository.isSpammyCategory(categoryItem.name)
|| categoryItem.name==term } }
.map { it.filterNot { categoryItem -> categoryItem.thumbnail == "hidden" } }
}

View file

@ -14,7 +14,6 @@ import media
import org.junit.Before
import org.junit.Test
import org.mockito.Mock
import org.mockito.Mockito.verifyNoInteractions
import org.mockito.MockitoAnnotations
import java.lang.reflect.Method
@ -97,8 +96,8 @@ class CategoriesPresenterTest {
)
)
)
whenever(repository.containsYear("selected")).thenReturn(false)
whenever(repository.containsYear("doesContainYear")).thenReturn(true)
whenever(repository.isSpammyCategory("selected")).thenReturn(false)
whenever(repository.isSpammyCategory("doesContainYear")).thenReturn(true)
whenever(repository.selectedCategories).thenReturn(listOf(
categoryItem("selected", "", "",true)))
categoriesPresenter.searchForCategories("test")

View file

@ -156,7 +156,7 @@ class UploadRepositoryUnitTest {
@Test
fun testContainsYear() {
assertEquals(
repository.containsYear(""), categoriesModel.containsYear("")
repository.isSpammyCategory(""), categoriesModel.isSpammyCategory("")
)
}