Add PageTitle class to canonicalize page names

This commit is contained in:
Yusuke Matsubara 2017-06-15 22:23:32 +09:00
parent 691acbf983
commit d241d36d66
12 changed files with 234 additions and 50 deletions

View file

@ -0,0 +1,23 @@
package fr.free.nrw.commons;
import android.support.test.runner.AndroidJUnit4;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import static org.hamcrest.CoreMatchers.is;
// TODO: use Robolectric and make it runnable without a connected device
@RunWith(AndroidJUnit4.class)
public class MediaTest {
@Test public void displayTitleShouldStripExtension() {
Media m = new Media("File:Example.jpg");
Assert.assertThat(m.getDisplayTitle(), is("Example"));
}
@Test public void displayTitleShouldUseSpaceForUnderscore() {
Media m = new Media("File:Example 1_2.jpg");
Assert.assertThat(m.getDisplayTitle(), is("Example 1 2"));
}
}

View file

@ -0,0 +1,61 @@
package fr.free.nrw.commons;
import android.support.test.runner.AndroidJUnit4;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.net.URLEncoder;
import static org.hamcrest.CoreMatchers.is;
// TODO: use Robolectric and make it runnable without a connected device
@RunWith(AndroidJUnit4.class)
public class PageTitleTest {
@Test public void displayTextShouldNotBeUnderscored() {
Assert.assertThat(new PageTitle("Ex_1 ").getDisplayText(),
is("Ex 1"));
}
@Test public void moreThanTwoColons() {
Assert.assertThat(new PageTitle("File:sample:a.jpg").getPrefixedText(),
is("File:Sample:a.jpg"));
}
@Test public void getTextShouldReturnWithoutNamespace() {
Assert.assertThat(new PageTitle("File:sample.jpg").getText(),
is("Sample.jpg"));
}
@Test public void capitalizeNameAfterNamespace() {
Assert.assertThat(new PageTitle("File:sample.jpg").getPrefixedText(),
is("File:Sample.jpg"));
}
@Test public void prefixedTextShouldBeUnderscored() {
Assert.assertThat(new PageTitle("Ex 1 ").getPrefixedText(),
is("Ex_1"));
}
@Test public void getMobileUriForTest() {
Assert.assertThat(new PageTitle("Test").getMobileUri().toString(),
is("https://commons.m.wikimedia.org/wiki/Test"));
}
@Test public void spaceBecomesUnderscoreInUri() {
Assert.assertThat(new PageTitle("File:Ex 1.jpg").getCanonicalUri().toString(),
is("https://commons.wikimedia.org/wiki/File:Ex_1.jpg"));
}
@Test public void leaveSubpageNamesUncapitalizedInUri() {
Assert.assertThat(new PageTitle("User:Ex/subpage").getCanonicalUri().toString(),
is("https://commons.wikimedia.org/wiki/User:Ex/subpage"));
}
@Test public void unicodeUri() throws Throwable {
Assert.assertThat(new PageTitle("User:例").getCanonicalUri().toString(),
is("https://commons.wikimedia.org/wiki/User:" + URLEncoder.encode("", "utf-8")));
}
}

View file

@ -2,6 +2,7 @@ package fr.free.nrw.commons;
import android.app.Activity;
import android.content.res.Resources;
import android.support.annotation.Nullable;
import org.xmlpull.v1.XmlPullParser;
@ -42,10 +43,11 @@ public class LicenseList {
return licenses.get(key);
}
@Nullable
public License licenseForTemplate(String template) {
String ucTemplate = Utils.capitalize(template);
String ucTemplate = new PageTitle(template).getDisplayText();
for (License license : values()) {
if (ucTemplate.equals(Utils.capitalize(license.getTemplate()))) {
if (ucTemplate.equals(new PageTitle(license.getTemplate()).getDisplayText())) {
return license;
}
}

View file

@ -47,7 +47,7 @@ public class Media implements Parcelable {
return "";
}
// FIXME: Gross hack bercause my regex skills suck maybe or I am too lazy who knows
String title = filename.replaceFirst("^File:", "");
String title = getFilePageTitle().getDisplayText().replaceFirst("^File:", "");
Matcher matcher = displayTitlePattern.matcher(title);
if(matcher.matches()) {
return matcher.group(1);
@ -56,13 +56,8 @@ public class Media implements Parcelable {
}
}
public String getDescriptionUrl() {
// HACK! Geez
return CommonsApplication.HOME_URL + "File:" + Utils.urlEncode(getFilename().replace("File:", "").replace(" ", "_"));
}
public String getMobileDescriptionUrl() {
return CommonsApplication.MOBILE_HOME_URL + "File:" + Utils.urlEncode(getFilename().replace("File:", "").replace(" ", "_"));
public PageTitle getFilePageTitle() {
return new PageTitle("File:" + getFilename().replaceFirst("^File:", ""));
}
public Uri getLocalUri() {

View file

@ -163,14 +163,14 @@ public class MediaDataExtractor {
}
}
private Node findTemplate(Element parentNode, String title) throws IOException {
String ucTitle= Utils.capitalize(title);
private Node findTemplate(Element parentNode, String title_) throws IOException {
String title= new PageTitle(title_).getDisplayText();
NodeList nodes = parentNode.getChildNodes();
for (int i = 0, length = nodes.getLength(); i < length; i++) {
Node node = nodes.item(i);
if (node.getNodeName().equals("template")) {
String foundTitle = getTemplateTitle(node);
if (Utils.capitalize(foundTitle).equals(ucTitle)) {
if (title.equals(new PageTitle(foundTitle).getDisplayText())) {
return node;
}
}

View file

@ -0,0 +1,91 @@
package fr.free.nrw.commons;
import android.net.Uri;
import android.support.annotation.NonNull;
public class PageTitle {
private final String namespace;
private final String titleKey;
/**
* Construct from a namespace-prefixed page name.
* @param prefixedText namespace-prefixed page name
*/
public PageTitle(@NonNull String prefixedText) {
String[] segments = prefixedText.trim().replace(" ", "_").split(":", 2);
// canonicalize and capitalize page title as done by MediaWiki
if (segments.length == 2) {
// TODO: canonicalize and capitalize namespace as well
// see https://www.mediawiki.org/wiki/Manual:Title.php#Canonical_forms
namespace = segments[0];
titleKey = Utils.capitalize(segments[1]);
} else {
namespace = "";
titleKey = Utils.capitalize(segments[0]);
}
}
/**
* Get the canonicalized title for displaying (such as "File:My example.jpg").
*
* @return canonical title
*/
@NonNull
public String getPrefixedText() {
if (namespace.isEmpty()) {
return titleKey;
} else {
return namespace + ":" + titleKey;
}
}
/**
* Get the canonical title for DB and URLs (such as "File:My_example.jpg").
*
* @return canonical title
*/
@NonNull
public String getDisplayText() {
return getPrefixedText().replace("_", " ");
}
/**
* Convert to a URI
* (such as "https://commons.wikimedia.org/wiki/File:My_example.jpg").
*
* @return URI
*/
@NonNull
public Uri getCanonicalUri() {
String uriStr = CommonsApplication.HOME_URL + Uri.encode(getPrefixedText(), ":/");
return Uri.parse(uriStr);
}
/**
* Convert to a mobile URI
* (such as "https://commons.m.wikimedia.org/wiki/File:My_example.jpg").
*
* @return URI
*/
@NonNull
public Uri getMobileUri() {
String uriStr = CommonsApplication.MOBILE_HOME_URL + Uri.encode(getPrefixedText(), ":/");
return Uri.parse(uriStr);
}
/**
* Get the canonical title without namespace.
* @return title
*/
@NonNull
public String getText() {
return titleKey;
}
@Override
public String toString() {
return getPrefixedText();
}
}

View file

@ -1,14 +1,16 @@
package fr.free.nrw.commons;
import android.content.Context;
import android.net.Uri;
import android.os.Build;
import android.preference.PreferenceManager;
import android.text.Html;
import android.text.Spanned;
import fr.free.nrw.commons.settings.Prefs;
import timber.log.Timber;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.w3c.dom.Node;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import java.io.BufferedInputStream;
import java.io.IOException;
@ -16,6 +18,7 @@ import java.io.InputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.net.URLEncoder;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
@ -34,12 +37,8 @@ import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.codec.net.URLCodec;
import org.w3c.dom.Node;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import fr.free.nrw.commons.settings.Prefs;
import timber.log.Timber;
public class Utils {
@ -126,7 +125,7 @@ public class Utils {
}
public static String makeThumbBaseUrl(String filename) {
String name = filename.replaceFirst("File:", "").replace(" ", "_");
String name = new PageTitle(filename).getPrefixedText();
String sha = new String(Hex.encodeHex(DigestUtils.md5(name)));
return String.format("%s/%s/%s/%s", CommonsApplication.IMAGE_URL_BASE, sha.substring(0, 1), sha.substring(0, 2), urlEncode(name));
}
@ -153,11 +152,9 @@ public class Utils {
return outputStream.toString();
}
private static final URLCodec urlCodec = new URLCodec();
public static String urlEncode(String url) {
try {
return urlCodec.encode(url, "utf-8");
return URLEncoder.encode(url, "utf-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
@ -232,12 +229,6 @@ public class Utils {
throw new RuntimeException("Unrecognized license value: " + license);
}
public static Uri uriForWikiPage(String name) {
String underscored = name.trim().replace(" ", "_");
String uriStr = CommonsApplication.HOME_URL + urlEncode(underscored);
return Uri.parse(uriStr);
}
/**
* Fast-forward an XmlPullParser to the next instance of the given element
* in the input stream (namespaced).

View file

@ -15,9 +15,13 @@ import android.view.inputmethod.EditorInfo;
import android.widget.Button;
import android.widget.EditText;
import android.widget.TextView;
import android.widget.Toast;
import fr.free.nrw.commons.*;
import fr.free.nrw.commons.BuildConfig;
import fr.free.nrw.commons.CommonsApplication;
import fr.free.nrw.commons.PageTitle;
import fr.free.nrw.commons.R;
import fr.free.nrw.commons.WelcomeActivity;
import fr.free.nrw.commons.contributions.ContributionsActivity;
import timber.log.Timber;
@ -158,7 +162,7 @@ public class LoginActivity extends AccountAuthenticatorActivity {
* @return String canonicial username
*/
private String canonicializeUsername( String username ) {
return Utils.capitalize(username.substring(0,1)) + username.substring(1);
return new PageTitle(username).getText();
}
@Override

View file

@ -9,7 +9,7 @@ import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Locale;
import fr.free.nrw.commons.CommonsApplication;
import fr.free.nrw.commons.PageTitle;
import fr.free.nrw.commons.concurrency.BackgroundPoolExceptionHandler;
import fr.free.nrw.commons.concurrency.ThreadPoolExecutorService;
import timber.log.Timber;
@ -34,7 +34,8 @@ public class UploadCountClient {
public void run() {
URL url;
try {
url = new URL(String.format(Locale.ENGLISH, UPLOAD_COUNT_URL_TEMPLATE, userName));
url = new URL(String.format(Locale.ENGLISH, UPLOAD_COUNT_URL_TEMPLATE,
new PageTitle(userName).getText()));
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
try {
BufferedReader bufferedReader = new BufferedReader(new

View file

@ -24,8 +24,8 @@ import fr.free.nrw.commons.LicenseList;
import fr.free.nrw.commons.Media;
import fr.free.nrw.commons.MediaDataExtractor;
import fr.free.nrw.commons.MediaWikiImageView;
import fr.free.nrw.commons.PageTitle;
import fr.free.nrw.commons.R;
import fr.free.nrw.commons.Utils;
import timber.log.Timber;
public class MediaDetailFragment extends Fragment {
@ -86,7 +86,7 @@ public class MediaDetailFragment extends Fragment {
public View onCreateView(LayoutInflater inflater, ViewGroup container, Bundle savedInstanceState) {
detailProvider = (MediaDetailPagerFragment.MediaDetailProvider)getActivity();
if(savedInstanceState != null) {
if (savedInstanceState != null) {
editable = savedInstanceState.getBoolean("editable");
index = savedInstanceState.getInt("index");
initialListTop = savedInstanceState.getInt("listTop");
@ -269,12 +269,11 @@ public class MediaDetailFragment extends Fragment {
}
}
private View buildCatLabel(String cat) {
final String catName = cat;
private View buildCatLabel(final String catName) {
final View item = getLayoutInflater(null).inflate(R.layout.detail_category_item, null, false);
final TextView textView = (TextView)item.findViewById(R.id.mediaDetailCategoryItemText);
textView.setText(cat);
textView.setText(catName);
if (categoriesLoaded && categoriesPresent) {
textView.setOnClickListener(new View.OnClickListener() {
@Override
@ -282,7 +281,7 @@ public class MediaDetailFragment extends Fragment {
String selectedCategoryTitle = "Category:" + catName;
Intent viewIntent = new Intent();
viewIntent.setAction(Intent.ACTION_VIEW);
viewIntent.setData(Utils.uriForWikiPage(selectedCategoryTitle));
viewIntent.setData(new PageTitle(selectedCategoryTitle).getCanonicalUri());
startActivity(viewIntent);
}
});

View file

@ -91,7 +91,7 @@ public class MediaDetailPagerFragment extends Fragment implements ViewPager.OnPa
final MediaDetailAdapter adapter = new MediaDetailAdapter(getChildFragmentManager());
if(savedInstanceState != null) {
if (savedInstanceState != null) {
final int pageNumber = savedInstanceState.getInt("current-page");
// Adapter doesn't seem to be loading immediately.
// Dear God, please forgive us for our sins
@ -143,7 +143,7 @@ public class MediaDetailPagerFragment extends Fragment implements ViewPager.OnPa
// View in browser
Intent viewIntent = new Intent();
viewIntent.setAction(Intent.ACTION_VIEW);
viewIntent.setData(Uri.parse(m.getMobileDescriptionUrl()));
viewIntent.setData(m.getFilePageTitle().getMobileUri());
startActivity(viewIntent);
return true;
case R.id.menu_download_current_image:
@ -224,14 +224,15 @@ public class MediaDetailPagerFragment extends Fragment implements ViewPager.OnPa
ShareActionProvider mShareActionProvider = (ShareActionProvider) MenuItemCompat.getActionProvider(menu.findItem(R.id.menu_share_current_image));
// On some phones null is returned for some reason:
// https://github.com/commons-app/apps-android-commons/issues/413
if(mShareActionProvider != null) {
if (mShareActionProvider != null) {
Intent shareIntent = new Intent(Intent.ACTION_SEND);
shareIntent.setType("text/plain");
shareIntent.putExtra(Intent.EXTRA_TEXT, m.getDisplayTitle() + " \n" + m.getDescriptionUrl());
shareIntent.putExtra(Intent.EXTRA_TEXT,
m.getDisplayTitle() + " \n" + m.getFilePageTitle().getCanonicalUri());
mShareActionProvider.setShareIntent(shareIntent);
}
if(m instanceof Contribution) {
if (m instanceof Contribution) {
Contribution c = (Contribution)m;
switch(c.getState()) {
case Contribution.STATE_FAILED:

View file

@ -1,10 +1,10 @@
package fr.free.nrw.commons;
import static org.hamcrest.CoreMatchers.is;
import org.junit.Assert;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.is;
public class UtilsTest {
@Test public void stripLocalizedStringPass() {
Assert.assertThat(Utils.stripLocalizedString("Hello"), is("Hello"));
@ -13,4 +13,20 @@ public class UtilsTest {
@Test public void stripLocalizedStringJa() {
Assert.assertThat(Utils.stripLocalizedString("\"こんにちは\"@ja"), is("こんにちは"));
}
@Test public void capitalizeLowercase() {
Assert.assertThat(Utils.capitalize("hello"), is("Hello"));
}
@Test public void capitalizeFullCaps() {
Assert.assertThat(Utils.capitalize("HELLO"), is("HELLO"));
}
@Test public void capitalizeNumbersPass() {
Assert.assertThat(Utils.capitalize("12x"), is("12x"));
}
@Test public void capitalizeJaPass() {
Assert.assertThat(Utils.capitalize("こんにちは"), is("こんにちは"));
}
}