#!/usr/bin/local import time import random def randomLine(file): """ Selects a line at random from a file. Uses only one pass through the file, and still gives a uniform chance of each line being picked. The math is basically: Base case: If you have 1 line, you return it, giving every line a 1/1 chance. Induction: After N lines, each line has a 1/N chance of being selected. Therefore, there is a 1/N * N/(N+1) chance of it still being selected (a 1 - 1/(N+1) chance of the new item not being selected). The N's reduce, leaving an 1/(N+1) of any previous line being selected, and since we took the new line with probability 1/(N+1), all lines have a 1/(N+1) probability of being selected at this point. import boilerplate_induction_verbosity_here """ current = 0 count = 0 random.seed( time.time() ) f = open(file, "r") for line in f.readlines(): if count == 0: current = line else: chooseit = random.random() threshold = 1.0 / (count + 1.0) if chooseit < threshold: current = line count = count + 1 f.close() return current chose = randomLine("data") print "Chose line: " + chose