#!/usr/bin/python import sys import json import zipfile import tarfile import re import datetime ROOMS = 'hipchat_export/rooms/' USERLIST = 'hipchat_export/users/list.json' ONEDAY = datetime.timedelta(1) HOURSECS = 3600 MINDATE = None MINDATE = datetime.datetime(2017, 1, 1) T_ROOMS = 'rooms.json' class Rooms(object): def __init__(self, tar): fp = tar.extractfile(T_ROOMS) self.rooms = [d['Room'] for d in json.load(fp)] def print_list(self): for room in self.rooms: print room['id'], room['name'] def print_room(tar, room): for line in _get_messages(tar, room): print('[%s] %s:%s: %s' % line) def histogram_room(zf, room): users = { } data = { } for line in _get_chats(zf, room): uid = line[1] if uid not in users: users[uid] = set([line[2]]) else: users[uid].add(line[2]) hour = line[0][:line[0].index(':')] if hour not in data: data[hour] = { uid: 1 } elif uid not in data[hour]: data[hour][uid] = 1 else: data[hour][uid] += 1 for uid in users: print uid, users[uid] uids = users.keys() print '\t'.join([''] + uids) for hour in sorted(data.keys()): columns = [hour] + [str(data[hour].get(uid, '')) for uid in uids] print '\t'.join(columns) #for uid in sorted(data[hour]): # print hour, uid, data[hour][uid] def print_users(zf): users = _get_users(zf) for uid, (name, email) in sorted(users.items(), key=lambda x: x[1][0]): print uid, name, email def histogram_user(zf, user, rooms, for_chart=False): data = { } for room in rooms: for line in _get_chats(zf, room): if line[1] != user: continue y,m,d = [int(x) for x in line[0][:10].split('-')] h = int(line[0][11:13]) slot = datetime.datetime(y, m, d, h) if MINDATE and slot < MINDATE: continue if slot not in data: data[slot] = 1 else: data[slot] += 1 totalhours = 0 totalcount = 0 weekhours = 0 startblock = None lasthour = None for hour in sorted(data.keys()): if not startblock: startblock = hour if lasthour: hours = (hour - lasthour).total_seconds() / HOURSECS if hours > 3: # a block of interaction has completed. add 1 hour for the LASTHOUR # that was observed. weekhours += 1 startweek = startblock.isocalendar()[1] thisweek = hour.isocalendar()[1] # if the work block was started on/before Friday, and this new # block-start after that, then let's note the workweek hours #print 'START:', startblock.weekday(), 'HOUR:', hour.weekday() if startblock.weekday() <= 4 and \ (hour.weekday() >= 5 or thisweek > startweek): if not for_chart: print 'Week Hours:', weekhours # if the work block was started in the week prior to this block, # then let's wrap up the prior week. if thisweek > startweek: if for_chart: print startweek, weekhours else: print 'Week+end Hours:', weekhours totalhours += weekhours weekhours = 0 # start/label the next block startblock = hour if not for_chart: print '...' #print 'HOURS:', weekhours print hour.strftime('%A:') else: weekhours += hours lasthour = hour if not for_chart: print '%s %d' % (hour, data[hour]) totalcount += data[hour] print print '---' print 'TOTAL (hours):', totalhours print 'TOTAL (count):', totalcount def _get_users(zf): users = { } fp = zf.open(USERLIST) for user in json.load(fp)['users']: users[user['user_id']] = (user['name'], user['email']) return users def _get_chats(zf, room): chats = [ ] # Little hack: only get files like $room/2nnn-* and avoid $room/ files = [f for f in zf.namelist() if f.startswith(ROOMS + room + '/2')] for fname in sorted(files): fp = zf.open(fname) for line in json.load(fp): if not line: continue date = line['date'].encode('utf8') user = line['from']['name'].encode('utf8') msg = _clean(line['message'].encode('utf8')) if line['from']['user_id'] == 'api': uid = 'api' else: uid = str(line['from']['user_id']) chats.append((date, uid, user, msg)) return chats def _get_messages(tar, room, only_user=True): messages = [ ] fp = tar.extractfile('rooms/%s/history.json' % (room,)) for line in json.load(fp): if only_user and 'UserMessage' not in line: continue try: msg = line['UserMessage'] except KeyError: print 'ERROR:', line continue messages.append((msg['timestamp'].encode('utf8'), msg['sender']['id'], msg['sender']['name'].encode('utf8'), _clean(msg['message'].encode('utf8')))) return messages def _clean(msg): return _RE_STRIP_HTML.sub('', msg) _RE_STRIP_HTML = re.compile('<.*?>') if __name__ == '__main__': cmd = sys.argv[1] fname = sys.argv[2] if fname.endswith('.zip'): zf = zipfile.ZipFile(fname) tar = None else: zf = None tar = tarfile.open(fname) ### this used to work with zipfiles. HipChat now downloads gzips. ### some commands have been converted. if cmd == 'list': Rooms(tar).print_list() elif cmd == 'room': print_room(tar, sys.argv[3]) elif cmd == 'hist': histogram_room(zf, sys.argv[3]) elif cmd == 'users': print_users(zf) elif cmd == 'user': histogram_user(zf, sys.argv[3], sys.argv[4:]) elif cmd == 'chart': histogram_user(zf, sys.argv[3], sys.argv[4:], True) else: print 'UNKNOWN COMMAND:', cmd sys.exit(1)