Backend overhaul fetch media by filename (#3081)

* Added class MwParseResponse and MwParseResult for receiving parse output

* Migrated fetchMediaByFilename to retrofit

* Removed unused code

* Added tests
This commit is contained in:
Ilgaz Er 2019-09-04 17:12:55 +03:00 committed by GitHub
parent 728ead7c38
commit c6794f3eb9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 94 additions and 96 deletions

View file

@ -66,8 +66,7 @@ public class MediaDataExtractor {
* @return
*/
private Single<String> getDiscussion(String filename) {
return mediaWikiApi.fetchMediaByFilename(filename.replace("File", "File talk"))
.flatMap(mediaResult -> mediaWikiApi.parseWikicode(mediaResult.getWikiSource()))
return mediaClient.getPageHtml(filename.replace("File", "File talk"))
.map(discussion -> HtmlCompat.fromHtml(discussion, HtmlCompat.FROM_HTML_MODE_LEGACY).toString())
.onErrorReturn(throwable -> {
Timber.e(throwable, "Error occurred while fetching discussion");

View file

@ -161,4 +161,13 @@ public class MediaClient {
.map(Media::from)
.single(Media.EMPTY);
}
@NonNull
public Single<String> getPageHtml(String title){
return mediaInterface.getPageHtml(title)
.filter(MwParseResponse::success)
.map(MwParseResponse::parse)
.map(MwParseResult::text)
.first("");
}
}

View file

@ -14,6 +14,9 @@ import retrofit2.http.QueryMap;
* Interface for interacting with Commons media related APIs
*/
public interface MediaInterface {
String MEDIA_PARAMS="&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
"|Artist|LicenseShortName|LicenseUrl";
/**
* Checks if a page exists or not.
*
@ -42,9 +45,7 @@ public interface MediaInterface {
*/
@GET("w/api.php?action=query&format=json&formatversion=2" + //Basic parameters
"&generator=categorymembers&gcmtype=file&gcmsort=timestamp&gcmdir=desc" + //Category parameters
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" + //Media property parameters
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
"|Artist|LicenseShortName|LicenseUrl")
MEDIA_PARAMS)
Observable<MwQueryResponse> getMediaListFromCategory(@Query("gcmtitle") String category, @Query("gcmlimit") int itemLimit, @QueryMap Map<String, String> continuation);
/**
@ -57,9 +58,7 @@ public interface MediaInterface {
*/
@GET("w/api.php?action=query&format=json&formatversion=2" + //Basic parameters
"&generator=search&gsrwhat=text&gsrnamespace=6" + //Search parameters
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" + //Media property parameters
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
"|Artist|LicenseShortName|LicenseUrl")
MEDIA_PARAMS)
Observable<MwQueryResponse> getMediaListFromSearch(@Query("gsrsearch") String keyword, @Query("gsrlimit") int itemLimit, @QueryMap Map<String, String> continuation);
/**
@ -69,9 +68,7 @@ public interface MediaInterface {
* @return
*/
@GET("w/api.php?action=query&format=json&formatversion=2" +
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
"|Artist|LicenseShortName|LicenseUrl")
MEDIA_PARAMS)
Observable<MwQueryResponse> getMedia(@Query("titles") String title);
/**
@ -82,8 +79,9 @@ public interface MediaInterface {
* @return
*/
@GET("w/api.php?action=query&format=json&formatversion=2&generator=images" +
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
"|Artist|LicenseShortName|LicenseUrl")
MEDIA_PARAMS)
Observable<MwQueryResponse> getMediaWithGenerator(@Query("titles") String title);
@GET("w/api.php?format=json&action=parse&prop=text")
Observable<MwParseResponse> getPageHtml(@Query("page") String title);
}

View file

@ -0,0 +1,25 @@
package fr.free.nrw.commons.media;
import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;
import org.wikipedia.dataclient.mwapi.MwResponse;
public class MwParseResponse extends MwResponse {
@Nullable
private MwParseResult parse;
@Nullable
public MwParseResult parse() {
return parse;
}
public boolean success() {
return parse != null;
}
@VisibleForTesting
protected void setParse(@Nullable MwParseResult parse) {
this.parse = parse;
}
}

View file

@ -0,0 +1,20 @@
package fr.free.nrw.commons.media;
import androidx.annotation.Nullable;
import com.google.gson.annotations.SerializedName;
public class MwParseResult {
@SuppressWarnings("unused") private int pageid;
@SuppressWarnings("unused") private int index;
private MwParseText text;
public String text() {
return text.text;
}
public class MwParseText{
@SerializedName("*") private String text;
}
}

View file

@ -4,7 +4,6 @@ import android.text.TextUtils;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.google.gson.Gson;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
@ -26,10 +25,6 @@ import java.util.Date;
import fr.free.nrw.commons.BuildConfig;
import fr.free.nrw.commons.CommonsApplication;
import fr.free.nrw.commons.BuildConfig;
import fr.free.nrw.commons.CommonsApplication;
import io.reactivex.Single;
import timber.log.Timber;
/**
@ -54,35 +49,6 @@ public class ApacheHttpClientMediaWikiApi implements MediaWikiApi {
api = new CustomMwApi(apiURL, httpClient);
}
@Override
public Single<String> parseWikicode(String source) {
return Single.fromCallable(() -> api.action("flow-parsoid-utils")
.param("from", "wikitext")
.param("to", "html")
.param("content", source)
.param("title", "Main_page")
.get()
.getString("/api/flow-parsoid-utils/@content"));
}
@Override
@NonNull
public Single<MediaResult> fetchMediaByFilename(String filename) {
return Single.fromCallable(() -> {
CustomApiResult apiResult = api.action("query")
.param("prop", "revisions")
.param("titles", filename)
.param("rvprop", "content")
.param("rvlimit", 1)
.param("rvgeneratexml", 1)
.get();
return new MediaResult(
apiResult.getString("/api/query/pages/page/revisions/rev"),
apiResult.getString("/api/query/pages/page/revisions/rev/@parsetree"));
});
}
/**
* Checks to see if a user is currently blocked from Commons
*

View file

@ -1,33 +0,0 @@
package fr.free.nrw.commons.mwapi;
public class MediaResult {
private final String wikiSource;
private final String parseTreeXmlSource;
/**
* Full-fledged constructor of MediaResult
*
* @param wikiSource Media wiki source
* @param parseTreeXmlSource Media tree parsed in XML
*/
MediaResult(String wikiSource, String parseTreeXmlSource) {
this.wikiSource = wikiSource;
this.parseTreeXmlSource = parseTreeXmlSource;
}
/**
* Gets wiki source
* @return Wiki source
*/
public String getWikiSource() {
return wikiSource;
}
/**
* Gets tree parsed in XML
* @return XML parsed tree
*/
public String getParseTreeXmlSource() {
return parseTreeXmlSource;
}
}

View file

@ -9,11 +9,6 @@ import io.reactivex.Single;
public interface MediaWikiApi {
Single<String> parseWikicode(String source);
@NonNull
Single<MediaResult> fetchMediaByFilename(String filename);
boolean isUserBlockedFromCommons();
void logout();

View file

@ -35,7 +35,7 @@ public class ReviewHelper {
}
/**
* Fetches recent changes from MediaWiki AP
* Fetches recent changes from MediaWiki API
* Calls the API to get 10 changes in the last 1 hour
* Earlier we were getting changes for the last 30 days but as the API returns just 10 results
* its best to fetch for just last 1 hour.

View file

@ -1,9 +1,7 @@
package fr.free.nrw.commons
import fr.free.nrw.commons.media.MediaClient
import fr.free.nrw.commons.mwapi.MediaResult
import fr.free.nrw.commons.mwapi.MediaWikiApi
import fr.free.nrw.commons.mwapi.OkHttpJsonApiClient
import io.reactivex.Single
import org.junit.Assert.assertTrue
import org.junit.Before
@ -49,13 +47,8 @@ class MediaDataExtractorTest {
`when`(mediaClient?.checkPageExistsUsingTitle(ArgumentMatchers.anyString()))
.thenReturn(Single.just(true))
val mediaResult = mock(MediaResult::class.java)
`when`(mediaResult.wikiSource).thenReturn("some wiki source")
`when`(mwApi?.fetchMediaByFilename(ArgumentMatchers.anyString()))
.thenReturn(Single.just(mediaResult))
`when`(mwApi?.parseWikicode(ArgumentMatchers.anyString()))
.thenReturn(Single.just("discussion text"))
`when`(mediaClient?.getPageHtml(ArgumentMatchers.anyString()))
.thenReturn(Single.just("Test"))
val fetchMediaDetails = mediaDataExtractor?.fetchMediaDetails("test.jpg")?.blockingGet()

View file

@ -196,4 +196,30 @@ class MediaClientTest {
assertEquals(media1.filename, "Test")
assertEquals(media2.filename, "Test")
}
@Test
fun getPageHtmlTest() {
val mwParseResult = mock(MwParseResult::class.java)
`when`(mwParseResult.text()).thenReturn("Test")
val mockResponse = MwParseResponse()
mockResponse.setParse(mwParseResult)
`when`(mediaInterface!!.getPageHtml(ArgumentMatchers.anyString()))
.thenReturn(Observable.just(mockResponse))
assertEquals("Test", mediaClient!!.getPageHtml("abcde").blockingGet())
}
@Test
fun getPageHtmlTestNull() {
val mockResponse = MwParseResponse()
mockResponse.setParse(null)
`when`(mediaInterface!!.getPageHtml(ArgumentMatchers.anyString()))
.thenReturn(Observable.just(mockResponse))
assertEquals("", mediaClient!!.getPageHtml("abcde").blockingGet())
}
}