If depicted Wikidata item has no associated Commons category property, then suggest categories from its P18 (#6130)

* Fix NPE with UploadMediaDetails.captionText

* Store P18 instead of processed image url in DepictedItem

* Add routes for fetching category info from titles

* Consider depict's P18 when suggesting categories

* Add tests

* Corrected DepictedItem constructor arguments

* Add test for DepictedItem::primaryImage
This commit is contained in:
Tanmay Gupta 2025-01-16 14:34:04 +05:30 committed by GitHub
parent 1f33926ed5
commit 1e64acdf1d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 212 additions and 28 deletions

View file

@ -127,30 +127,64 @@ class CategoriesModel
/**
* Fetches details of every category associated with selected depictions, converts them into
* CategoryItem and returns them in a list.
* If a selected depiction has no categories, the categories in which its P18 belongs are
* returned in the list.
*
* @param selectedDepictions selected DepictItems
* @return List of CategoryItem associated with selected depictions
*/
private fun categoriesFromDepiction(selectedDepictions: List<DepictedItem>): Observable<MutableList<CategoryItem>>? =
Observable
.fromIterable(
selectedDepictions.map { it.commonsCategories }.flatten(),
).map { categoryItem ->
categoryClient
.getCategoriesByName(
categoryItem.name,
categoryItem.name,
SEARCH_CATS_LIMIT,
).map {
CategoryItem(
it[0].name,
it[0].description,
it[0].thumbnail,
it[0].isSelected,
)
}.blockingGet()
}.toList()
.toObservable()
private fun categoriesFromDepiction(selectedDepictions: List<DepictedItem>): Observable<MutableList<CategoryItem>>? {
val observables = selectedDepictions.map { depictedItem ->
if (depictedItem.commonsCategories.isEmpty()) {
if (depictedItem.primaryImage == null) {
return@map Observable.just(emptyList<CategoryItem>())
}
Observable.just(
depictedItem.primaryImage
).map { image ->
categoryClient
.getCategoriesOfImage(
image,
SEARCH_CATS_LIMIT,
).map {
it.map { category ->
CategoryItem(
category.name,
category.description,
category.thumbnail,
category.isSelected,
)
}
}.blockingGet()
}.flatMapIterable { it }.toList()
.toObservable()
} else {
Observable
.fromIterable(
depictedItem.commonsCategories,
).map { categoryItem ->
categoryClient
.getCategoriesByName(
categoryItem.name,
categoryItem.name,
SEARCH_CATS_LIMIT,
).map {
CategoryItem(
it[0].name,
it[0].description,
it[0].thumbnail,
it[0].isSelected,
)
}.blockingGet()
}.toList()
.toObservable()
}
}
return Observable.concat(observables)
.scan(mutableListOf<CategoryItem>()) { accumulator, currentList ->
accumulator.apply { addAll(currentList) }
}
}
/**
* Fetches details of every category by their name, converts them into

View file

@ -78,6 +78,24 @@ class CategoryClient
),
)
/**
* Fetches categories belonging to an image (P18 of some wikidata entity).
*
* @param image P18 of some wikidata entity
* @param itemLimit How many categories to return
* @return Single Observable emitting the list of categories
*/
fun getCategoriesOfImage(
image: String,
itemLimit: Int,
): Single<List<CategoryItem>> =
responseMapper(
categoryInterface.getCategoriesByTitles(
"File:${image}",
itemLimit,
),
)
/**
* The method takes categoryName as input and returns a List of Subcategories
* It uses the generator query API to get the subcategories in a category, 500 at a time.

View file

@ -61,6 +61,21 @@ interface CategoryInterface {
@Query("gacoffset") offset: Int,
): Single<MwQueryResponse>
/**
* Fetches non-hidden categories by titles.
*
* @param titles titles to fetch categories for (e.g. File:<P18 of a wikidata entity>)
* @param itemLimit How many categories to return
* @return MwQueryResponse
*/
@GET(
"w/api.php?action=query&format=json&formatversion=2&generator=categories&prop=categoryinfo|description|pageimages&piprop=thumbnail&pithumbsize=70&gclshow=!hidden",
)
fun getCategoriesByTitles(
@Query("titles") titles: String?,
@Query("gcllimit") itemLimit: Int,
): Single<MwQueryResponse>
@GET("w/api.php?action=query&format=json&formatversion=2&generator=categorymembers&gcmtype=subcat&prop=info&gcmlimit=50")
fun getSubCategoryList(
@Query("gcmtitle") categoryName: String,

View file

@ -68,6 +68,9 @@ data class DepictedItem constructor(
entity.id(),
)
val primaryImage: String?
get() = imageUrl?.split('-')?.lastOrNull()
override fun equals(other: Any?) =
when {
this === other -> true

View file

@ -1,7 +1,9 @@
package fr.free.nrw.commons.category
import categoryItem
import com.nhaarman.mockitokotlin2.any
import com.nhaarman.mockitokotlin2.mock
import com.nhaarman.mockitokotlin2.times
import com.nhaarman.mockitokotlin2.verify
import com.nhaarman.mockitokotlin2.whenever
import depictedItem
@ -90,14 +92,18 @@ class CategoriesModelTest {
val depictedItem =
depictedItem(
commonsCategories =
listOf(
CategoryItem(
"depictionCategory",
"",
"",
false,
),
listOf(
CategoryItem(
"depictionCategory",
"",
"",
false,
),
),
)
val depictedItemWithoutCategories =
depictedItem(
imageUrl = "testUrl"
)
whenever(gpsCategoryModel.categoriesFromLocation)
@ -159,6 +165,23 @@ class CategoriesModelTest {
),
),
)
whenever(
categoryClient.getCategoriesOfImage(
"testUrl",
25,
),
).thenReturn(
Single.just(
listOf(
CategoryItem(
"categoriesOfP18",
"",
"",
false,
),
),
),
)
val imageTitleList = listOf("Test")
CategoriesModel(categoryClient, categoryDao, gpsCategoryModel)
.searchAll("", imageTitleList, listOf(depictedItem))
@ -171,8 +194,21 @@ class CategoriesModelTest {
categoryItem("recentCategories"),
),
)
CategoriesModel(categoryClient, categoryDao, gpsCategoryModel)
.searchAll("", imageTitleList, listOf(depictedItemWithoutCategories))
.test()
.assertValue(
listOf(
categoryItem("categoriesOfP18"),
categoryItem("gpsCategory"),
categoryItem("titleSearch"),
categoryItem("recentCategories"),
),
)
imageTitleList.forEach {
verify(categoryClient).searchCategories(it, CategoriesModel.SEARCH_CATS_LIMIT)
verify(categoryClient, times(2)).searchCategories(it, CategoriesModel.SEARCH_CATS_LIMIT)
verify(categoryClient).getCategoriesByName(any(), any(), any(), any())
verify(categoryClient).getCategoriesOfImage(any(), any())
}
}

View file

@ -132,6 +132,45 @@ class CategoryClientTest {
)
}
@Test
fun getCategoriesByTitlesFound() {
val mockResponse = withMockResponse("Category:Test")
whenever(
categoryInterface.getCategoriesByTitles(
anyString(),
anyInt(),
),
).thenReturn(Single.just(mockResponse))
categoryClient
.getCategoriesOfImage("tes", 10)
.test()
.assertValues(
listOf(
CategoryItem(
"Test",
"",
"",
false,
),
),
)
categoryClient
.getCategoriesOfImage(
"tes",
10,
).test()
.assertValues(
listOf(
CategoryItem(
"Test",
"",
"",
false,
),
),
)
}
@Test
fun getCategoriesByNameNull() {
val mockResponse = withNullPages()
@ -160,6 +199,29 @@ class CategoryClientTest {
.assertValues(emptyList())
}
@Test
fun getCategoriesByTitlesNull() {
val mockResponse = withNullPages()
whenever(
categoryInterface.getCategoriesByTitles(
anyString(),
anyInt(),
),
).thenReturn(Single.just(mockResponse))
categoryClient
.getCategoriesOfImage(
"tes",
10,
).test()
.assertValues(emptyList())
categoryClient
.getCategoriesOfImage(
"tes",
10,
).test()
.assertValues(emptyList())
}
@Test
fun getParentCategoryListFound() {
val mockResponse = withMockResponse("Category:Test")

View file

@ -181,4 +181,20 @@ class DepictedItemTest {
fun `hashCode returns different values for objects with different name`() {
Assert.assertNotEquals(depictedItem(name = "a").hashCode(), depictedItem(name = "b").hashCode())
}
@Test
fun `primaryImage is derived correctly from imageUrl`() {
Assert.assertEquals(
DepictedItem(
entity(
statements = mapOf(
WikidataProperties.IMAGE.propertyName to listOf(
statement(snak(dataValue = valueString("prefix: example_image name"))),
),
),
),
).primaryImage,
"_example_image_name",
)
}
}