More random recent image selection (#1542)

time-based randomness is biased - if someone uploaded 100 images in
hour, one week ago, and I select a random point in time, their last
image is way more likely to come up than anything else.

With this, there is still bias towards choosing one of the last N
in any burst of uploads (where N is the number of recent changes
fetched) but it's a bit better than before.
This commit is contained in:
Elliott Eggleston 2018-05-20 05:45:44 -05:00 committed by maskara
parent db25928aa7
commit 44567627d0

View file

@ -3,6 +3,8 @@ package fr.free.nrw.commons.media;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import java.util.Random;
import javax.annotation.Nullable;
public class RecentChangesImageUtils {
@ -13,8 +15,23 @@ public class RecentChangesImageUtils {
@Nullable
public static String findImageInRecentChanges(NodeList childNodes) {
String imageTitle;
for (int i = 0; i < childNodes.getLength(); i++) {
Element e = (Element)childNodes.item(i);
Random r = new Random();
int count = childNodes.getLength();
// Build a range array
int[] randomIndexes = new int[count];
for (int i = 0; i < count; i++) {
randomIndexes[i] = i;
}
// Then shuffle it
for (int i = 0; i < count; i++) {
int swapIndex = r.nextInt(count);
int temp = randomIndexes[i];
randomIndexes[i] = randomIndexes[swapIndex];
randomIndexes[swapIndex] = temp;
}
for (int i = 0; i < count; i++) {
int randomIndex = randomIndexes[i];
Element e = (Element) childNodes.item(randomIndex);
if (e.getAttribute("type").equals("log") && !e.getAttribute("old_revid").equals("0")) {
// For log entries, we only want ones where old_revid is zero, indicating a new file
continue;