#!/usr/bin/python # # Parse out as much licensing information as we can from our vendored directories to create a license report. # You may need to edit this afterwords to replace any 'UNKNOWN' with actual data. import csv import fnmatch import json import os import re import sys import yolk.pypi def usage(): print "license-audit.py []" sys.exit(1) def read_requirements(towerpath): filename = '%s/requirements/requirements.txt' % (towerpath,) ret = {} try: f = open(filename) except: print "failed to open %s" %(filename,) return None data = f.readlines() f.close() for line in data: if line[0] == '#': continue if '==' in line: m = re.match(r"(\S+)==(\S+)",line) if m: name = m.group(1) version = m.group(2) item = {} item['name'] = name item['version'] = version ret[name] = item continue elif line.startswith("git+https"): l = line.rsplit('/',1) m = re.match(r"(\S+).git@(\S+)#",l[1]) if m: name = m.group(1) version = m.group(2) if version.startswith('tower_'): version = version[6:] item = {} if name == 'python-ipy': name='ipy' item['name'] = name item['version'] = version if len(version) > 20: # it's a sha1sum, read it off the egg spec lver = l[1].rsplit('-',1) if lver[1] == l[1]: lver = l[1].rsplit('_',1) item['version'] = lver[1][:-1] ret[name] = item continue else: item = {} item['name'] = line[:-1] item['version'] = '' ret[name] = item continue return ret def get_js(towerpath): excludes = [ ] directory = '%s/awx/ui/static/lib' % (towerpath,) dirlist = os.listdir(directory) ret = {} for item in dirlist: use = True for exclude in excludes: if fnmatch.fnmatch(item, exclude): use = False if use: try: bowerfile = open('%s/%s/bower.json' %(directory, item)) except: # add dummy entry (should read package.json if it exists) pkg = {} pkg['name'] = item pkg['license'] = 'UNKNOWN' pkg['url'] = 'UNKNOWN' ret[item] = pkg continue pkginfo = json.load(bowerfile) bowerfile.close() pkg = {} pkg['name'] = item if 'license' in pkginfo: pkg['license'] = normalize_license(pkginfo['license']) else: pkg['license'] = 'UNKNOWN' if 'homepage' in pkginfo: pkg['url'] = pkginfo['homepage'] elif 'url' in pkginfo: pkg['url'] = pkginfo['url'] else: pkg['url'] = 'UNKNOWN' ret[item] = pkg return ret def search_requirements(requirements_dict, path): for item in requirements_dict.values(): if item['path'] == path: return True return False def normalize_license(license): if not license: return 'UNKNOWN' license = license.replace('"','') if license == 'None': return 'UNKNOWN' if license in ['Apache License, Version 2.0', 'Apache License (2.0)', 'Apache License 2.0', 'Apache-2.0', 'Apache License, v2.0', 'APL2']: return 'Apache 2.0' if license == 'ISC license': return 'ISC' if license == 'MIT License' or license == 'MIT license': return 'MIT' if license in ['BSD License', 'Simplified BSD', 'BSD-derived (http://www.repoze.org/LICENSE.txt)', 'BSD-like', 'Modified BSD License']: return 'BSD' if license == 'LGPL': return 'LGPL 2.1' # Don't embed YOUR ENTIRE LICENSE in your metadata! if license.find('Copyright 2011-2013 Jeffrey Gelens') != -1: return 'Apache 2.0' if license.find('https://github.com/umutbozkurt/django-rest-framework-mongoengine/blob/master/LICENSE') != -1: return 'MIT' if license == '"BSD or Apache License, Version 2.0"': return 'BSD or Apache 2.0' if license == 'Modified BSD License': return 'BSD' if license == 'Python Software Foundation License': return 'PSF' return license def read_csv(filename): ret = {} f = open(filename) if not f: print "failed to open %s" %(filename,) return None reader = csv.reader(f, delimiter=',') for line in reader: item = {} item['name'] = line[0] item['license'] = line[1] item['url'] = line[2] item['source'] = line[3] ret[line[0]] = item return ret def write_csv(filename, data): keys = data.keys() keys.sort() csvfile = open(filename, 'wb') writer = csv.writer(csvfile, delimiter = ',', lineterminator = '\n') for key in keys: item = data[key] l = (item['name'],item['license'],item['url'],item['source']) writer.writerow(l) csvfile.close() if len(sys.argv) < 3: usage() if len(sys.argv) < 4: outputfile = sys.stdout else: outputfile = sys.argv[3] tower_path = sys.argv[1] # Read old license CSV olddata = read_csv(sys.argv[2]) # Read python site-packages README requirements file requirements = read_requirements(tower_path) if not olddata or not requirements: print "No starting data" sys.exit(1) # See if there's pip things in our current license list that we don't have now reqs = requirements.keys() for item in olddata.values(): if item['source'] == 'pip' and item['name'] not in reqs: print "Potentially no longer vendoring %s" %(item['name'],) # Get directory of vendored JS things from the js dir js_packages = get_js(tower_path) # See if there's JS things in our current license list that we don't have now js = js_packages.keys() for item in olddata.values(): if item['source'] == 'js' and item['name'] not in js: print "No longer vendoring %s" %(item['name'],) # Take the requirements file, and get license information where necessary cs = yolk.pypi.CheeseShop() for req in requirements.values(): # name sanitization (pname, pvers) = cs.query_versions_pypi(req['name']) cs_info = cs.release_data(pname,req['version']) if not cs_info: print "Couldn't find '%s==%s'" %(req['name'],req['version']) if req['name'] not in olddata: print "... and it's not in the current data. This needs fixed!" sys.exit(1) continue license = normalize_license(cs_info['license']) url = cs_info['home_page'] try: data = olddata[req['name']] except: print "New item %s" %(req['name']) item = {} item['name'] = req['name'] item['license'] = license item['url'] = url item['source'] = 'pip' olddata[req['name']] = item continue if license != 'UNKNOWN' and license != data['license']: data['license'] = license if url != 'UNKNOWN' and url != data['url']: data['url'] = url # Update JS package info for pkg in js: if pkg in olddata: data = olddata[pkg] new = js_packages[pkg] if new['license'] != 'UNKNOWN' and new['license'] != data['license']: data['license'] = new['license'] if new['url'] != 'UNKNOWN' and new['url'] != data['url']: data['url'] = new['url'] else: item = {} item['name'] = pkg item['license'] = js_packages[pkg]['license'] item['url'] = js_packages[pkg]['url'] item['source'] = 'js' olddata[pkg] = item continue write_csv(outputfile, olddata)