#!/usr/bin/python

# Processes a file of the format: 
# customer identifier \t URL
# and determines the most common sequences of 3-URLs hit 

import re

# Maps customer ID to the last few URLs.
customerToURLs = {}

# Maps the URL-triplets to the number of times we've seen it. 
tripletCounts = {}

# Keeps track of the best N entries. 
best = []

def insertSortedToBest( key, count ):
	""" Inserts a key/count pair into a list.
	
	Uses the count (in the second slot of the tuple) to determine which 
	position in the list (if any) this item should go, and inserts it. 
	Maintains the list at 10 elements.""" 	

	pos = findPositionInBest(count)
	if count == -1 :
		return
	old = [key, count - 1]
	new = [key, count]

	best.insert(pos, new)

	if best.count(old) > 0: 	
		best.remove(old)


	if len(best) == 11: 
		best.remove( best[10] )

	

def findPositionInBest( count ): 
	""" Determines the appropriate position for an element to be inserted
	into our best list."""

	if len(best) == 0:
		return 0 

	i = 0 
	while i < len(best) and count < best[i][1]:
		i = i + 1
	if i > 9:
		return -1
	else:
		return i
	

def processEntry( customer, url ):
	""" Entirely processes a customer/URL pair. """

	if customerToURLs.has_key(customer):
		sequence = customerToURLs[customer]	
		sequence.append(url)	

		# If we have enough elements already, 
		# remove the earliest one from the list
		if len(sequence) == 4:
			sequence.pop(0)

		# If we have enough elements, create a triplet
		# and increment or set it in the triplet counts
		if len(sequence) == 3: 
			a, b,c = sequence
			key = a + ":" + b + ":" + c
			if tripletCounts.has_key(key):
				tripletCounts[key] = tripletCounts[key] + 1
				insertSortedToBest(key, tripletCounts[key])
			else:
				tripletCounts[key] = 1			
				insertSortedToBest(key, 1) 


	else:
		customerToURLs[customer] = [url]
			
r = re.compile(r'\t');

f = open("c:\\data", "r")
for line in f:
	customer, url = r.split(line)
	processEntry(customer, url)
f.close()

print "====\nSolution:"
print best