#!/usr/bin/python # -*- coding: utf-8 -*- import csv import collections import json import itertools import re from distutils.version import LooseVersion def get_data(filename): '''function to read the data form the input csv file to use in the analysis''' reader = [] # Just in case the file open fails with open(filename, 'rb') as f: reader = csv.reader(f, delimiter=',') # returns all the data from the csv file in list form # f.close() # May need to close the file when done return list(reader) # only return the reader when you have finished. your_list = [] your_list += get_data('./scripts/anselm.csv') your_list += get_data('./scripts/salomon.csv') your_list += get_data('./scripts/uv2000.csv') your_list += get_data('./scripts/phi.csv') your_list += get_data('./scripts/dgx.csv') your_list += get_data('./scripts/barbora.csv') counts = dict() for i in your_list: counts[i[0]] = counts.get(i[0], 0) + int(i[1]) # 1 2 4 8 16 32 l = ['A', 'S', 'U', 'P', 'D', 'B'] c = [] mask = ''.join(reversed(l)) from itertools import product for bits in product([0, 1], repeat=len(l)): s = "".join(str(bit) for bit in bits) ns = "" for i in range(len(s)): if s[i] == "1": ns += mask[i] else: ns += "-" c.append(ns) software = dict() versions = '' clusters = '' prev = '' for m, i in sorted(counts.items()): # print m split = m.split('/') # print split if len(split) > 1: a = split[0] b = split[1] if split[0] <> prev: software[a] = {} software[a][b] = '`' + c[i] + '`' prev = a packages = {} for m in sorted(software.items(), key=lambda i: i[0].lower()): packages[m[0]] = sorted(m[1], key=LooseVersion)[len(m[1]) - 1] data = {'total': len(packages), 'projects': packages} print json.dumps(data)