modules-json.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

import csv
import collections
import json
import itertools
import re

from distutils.version import LooseVersion

def get_data(filename):
    '''function to read the data form the input csv file to use in the analysis'''
    reader = [] # Just in case the file open fails
    with open(filename, 'rb') as f:
        reader = csv.reader(f,delimiter=',')
        #returns all the data from the csv file in list form
        #f.close() # May need to close the file when done
        return list(reader)  # only return the reader when you have finished.

your_list = []
your_list += get_data('./scripts/anselm.csv')
your_list += get_data('./scripts/salomon.csv')
your_list += get_data('./scripts/uv2000.csv')
your_list += get_data('./scripts/phi.csv')
your_list += get_data('./scripts/dgx.csv')

counts = dict()
for i in your_list:
  counts[i[0]]=counts.get(i[0], 0) + int(i[1])

l = ['A', 'S', 'U', 'P', 'D']
c = []
mask = ''.join(reversed(l))
from itertools import product
for bits in product([0, 1], repeat=len(l)):
    s = "".join(str(bit) for bit in bits)
    ns = ""
    for i in range(len(s)):
        if s[i] == "1":
            ns += mask[i]
        else:
            ns += "-"
    c.append(ns)

software = dict()
versions = ''
clusters = ''
prev = ''

for m,i in sorted(counts.items()):
  #print m
  split =  m.split('/')
  #print split
  if len(split) > 1:
    a = split[0]
    b = split[1]
    if split[0] <> prev:
      software[a] = {}
    software[a][b] = '`' + c[i] + '`'
    prev = a


packages = {}

for m in sorted(software.items(), key=lambda i: i[0].lower()):
  packages[m[0]]=sorted(m[1], key=LooseVersion)[len(m[1])-1]

data = {'total': len(packages), 'projects': packages }
print json.dumps(data)