#!/usr/bin/python
# -*- coding: utf-8 -*-

import csv
import collections
import json
import itertools
import re

from distutils.version import LooseVersion


def get_data(filename):
    '''function to read the data form the input csv file to use in the analysis'''
    reader = []  # Just in case the file open fails
    with open(filename, 'rb') as f:
        reader = csv.reader(f, delimiter=',')
        # returns all the data from the csv file in list form
        # f.close() # May need to close the file when done
        return list(reader)  # only return the reader when you have finished.

your_list = []
your_list += get_data('./scripts/anselm.csv')
your_list += get_data('./scripts/salomon.csv')
your_list += get_data('./scripts/uv2000.csv')
your_list += get_data('./scripts/phi.csv')
your_list += get_data('./scripts/dgx.csv')
your_list += get_data('./scripts/barbora.csv')

counts = dict()
for i in your_list:
    counts[i[0]] = counts.get(i[0], 0) + int(i[1])

#     1    2    4    8    16   32
l = ['A', 'S', 'U', 'P', 'D', 'B']
c = []
mask = ''.join(reversed(l))
from itertools import product
for bits in product([0, 1], repeat=len(l)):
    s = "".join(str(bit) for bit in bits)
    ns = ""
    for i in range(len(s)):
        if s[i] == "1":
            ns += mask[i]
        else:
            ns += "-"
    c.append(ns)

software = dict()
versions = ''
clusters = ''
prev = ''

for m, i in sorted(counts.items()):
    # print m
    split = m.split('/')
    # print split
    if len(split) > 1:
        a = split[0]
        b = split[1]
        if split[0] <> prev:
            software[a] = {}
        software[a][b] = '`' + c[i] + '`'
        prev = a


packages = {}

for m in sorted(software.items(), key=lambda i: i[0].lower()):
    packages[m[0]] = sorted(m[1], key=LooseVersion)[len(m[1]) - 1]

data = {'total': len(packages), 'projects': packages}
print json.dumps(data)