#!/usr/bin/local
import time
import random
def randomLine(file):
""" Selects a line at random from a file.
Uses only one pass through the file, and still gives a uniform chance of each line
being picked. The math is basically:
Base case:
If you have 1 line, you return it, giving every line a 1/1 chance.
Induction:
After N lines, each line has a 1/N chance of being selected. Therefore, there is a
1/N * N/(N+1) chance of it still being selected (a 1 - 1/(N+1) chance of the new
item not being selected). The N's reduce, leaving an 1/(N+1) of any previous line being
selected, and since we took the new line with probability 1/(N+1), all lines have a
1/(N+1) probability of being selected at this point.
import boilerplate_induction_verbosity_here
"""
current = 0
count = 0
random.seed( time.time() )
f = open(file, "r")
for line in f.readlines():
if count == 0:
current = line
else:
chooseit = random.random()
threshold = 1.0 / (count + 1.0)
if chooseit < threshold:
current = line
count = count + 1
f.close()
return current
chose = randomLine("data")
print "Chose line: " + chose