diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..83d5114 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Configuration file: +/conf/config.yaml + +# Distribution / packaging +build/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +*.egg-info/ +/dist/ +MANIFEST + +# Python cache / compiled files: +__pycache__/ +*.py[cod] + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Environments +/.venv*/ +/venv*/ +/.env*/ +/env*/ + + +# JetBrains IDE +/.idea/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..88a4998 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +default_stages: [pre-commit, pre-push] +default_language_version: + # force all unspecified python hooks to run python3 + python: python3 +minimum_pre_commit_version: "3.4.0" + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/README.md b/README.md index f2c99cc..9883b1e 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,20 @@ The Kibble Scanners collect information for the Kibble Suite. - Edit conf/config.yaml to match your Kibble service +### Dependencies + + - a running Elasticsearch server + ## How to run: - - On a daily/weekly/whatever basis, run: `python3 src/kibble-scanner.py`. + - On a daily/weekly/whatever basis, run in folder src: `python3 kibble-scanner.py`. ### Command line options: usage: kibble-scanner.py [-h] [-o ORG] [-f CONFIG] [-a AGE] [-s SOURCE] [-n NODES] [-t TYPE] [-e EXCLUDE [EXCLUDE ...]] [-v VIEW] - + optional arguments: -h, --help show this help message and exit -o ORG, --org ORG The organisation to gather stats for. If left out, all @@ -75,7 +79,44 @@ The Kibble Scanners collect information for the Kibble Suite. - python3-certifi - python3-yaml - +### Testing + + TBD + +### Pre-commit + + After running + + pip install -r requirements.txt + + Run + + pre-commit install + + to install + + Run it explicitely by + + pre-commit + + to run the checks in .pre-commit-config.yaml + + If installed the pre-commit reads the configuration, and will check on the hooks, currently pre-comit and pre-push. + While the checks are not satisfied, just rerun the commit command until the hook checks are passed. + + +### Project build + + After installation of the build tool + + pip install -q build + + build the project by running + + python -m build + + Find more information in project.toml file and [Setuptools](https://setuptools.pypa.io/). + # Get involved + TBD. Please see https://kibble.apache.org/ for details! - diff --git a/conf/config.yaml b/conf/config.yaml.sample similarity index 100% rename from conf/config.yaml rename to conf/config.yaml.sample diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..708dca3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +############################## +# Python packaging settings: # + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "kibble-scanners" +description = "The Kibble Scanners collect information for the Kibble Suite. Apache Kibble is a tool to collect, aggregate and visualize data about any software project that uses commonly known tools." +version = "1.0.0" + +dependencies = [ + "python-dateutil", + "certifi", + "requests", + "psutil", + "elasticsearch", + "PyYAML>=5.2", +] + +requires-python = ">=3.9, <4.0" + +authors = [ + { name = "Apache Software Foundation", email = "dev@kibble.apache.org" }, +] +maintainers = [ + { name = "Apache Software Foundation", email="dev@kibble.apache.org" }, +] +keywords = [ + "kibble-scanners", "data" ] + +license = { text = "Apache License, Version 2.0" } +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Environment :: Console", + "Framework :: Apache Kibble-Scanners", + "License :: OSI Approved :: Apache Software License", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Topic :: System :: Monitoring" +] + +dynamic = [ + "readme" +] + +[project.urls] +repository = "https://github.com/apache/kibble-scanners.git" +"Bug Tracker" = "https://github.com/apache/kibble-scanners/issues" + + +[tool.setuptools] +dynamic = { readme = { file = ["README.md"] } } +packages.find = { where = ["src"] } diff --git a/requirements.txt b/requirements.txt index 7db5a42..6755b2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ psutil python-dateutil requests pyyaml +pre_commit diff --git a/src/kibble-scanner.py b/src/kibble-scanner.py index ac43477..7b441bd 100644 --- a/src/kibble-scanner.py +++ b/src/kibble-scanner.py @@ -28,8 +28,8 @@ import plugins.brokers.kibbleES #import plugins.kibbleJSON -VERSION = "0.1.0" -CONFIG_FILE = "conf/config.yaml" +VERSION = "0.2.0" +CONFIG_FILE = "../conf/config.yaml" PENDING_OBJECTS = [] BIG_LOCK = threading.Lock() @@ -43,15 +43,16 @@ def base_parser(): arg_parser.add_argument("-t", "--type", help="Specific type of scanner to run (default is run all scanners)") arg_parser.add_argument("-e", "--exclude", nargs = '+', help="Specific type of scanner(s) to exclude") arg_parser.add_argument("-v", "--view", help="Specific source view to scan (default is scan all sources)") + arg_parser.add_argument("-j", "--filter", nargs='+', help="Jenkins-only: Filter the list of jobs (e.g. for debugging). To drill down to the target jobs, all nodes to the leaf node(s) are required, e.g --filter . Type is set to jenkins implicitely.") return arg_parser - + def pprint(string, err = False): line = "[core]: %s" % (string) if err: sys.stderr.write(line + "\n") else: print(line) - + def isMine(ID, config): if config['scanner'].get('balance', None): @@ -65,11 +66,11 @@ def isMine(ID, config): return True return False return True - + class scanThread(threading.Thread): """ A thread object that grabs an item from the queue and processes it, using whatever plugins will come out to play. """ - def __init__(self, broker, org, i, t = None, e = None): + def __init__(self, broker, org, i, t = None, e = None, f= None): super(scanThread, self).__init__() self.broker = broker self.org = org @@ -77,8 +78,12 @@ def __init__(self, broker, org, i, t = None, e = None): self.bit = self.broker.bitClass(self.broker, self.org, i) self.stype = t self.exclude = e + self.filter = f + # override + if self.filter: + self.stype = "jenkins" pprint("Initialized thread %i" % i) - + def run(self): global BIG_LOCK, PENDING_OBJECTS time.sleep(0.5) # Primarily to align printouts. @@ -89,6 +94,7 @@ def run(self): try: # Try grabbing an object (might not be any left!) obj = PENDING_OBJECTS.pop(0) + #print("object: %s" %(obj)) except: pass BIG_LOCK.release() @@ -97,14 +103,17 @@ def run(self): if isMine(obj['sourceID'], self.broker.config): # Run through list of scanners in order, apply when useful for sid, scanner in plugins.scanners.enumerate(): - + if scanner.accepts(obj): self.bit.pluginname = "plugins/scanners/" + sid # Excluded scanner type? if self.exclude and sid in self.exclude: continue + # specific jenkins filter + if self.stype and self.stype == sid and self.filter and sid == "jenkins": + scanner.scan(self.bit, obj, self.filter) # Specific scanner type or no types mentioned? - if not self.stype or self.stype == sid: + elif not self.stype or self.stype == sid: scanner.scan(self.bit, obj) else: break @@ -115,13 +124,13 @@ def main(): pprint("Kibble Scanner v/%s starting" % VERSION) global CONFIG_FILE, PENDING_OBJECTS args = base_parser().parse_args() - + # Load config yaml if args.config: CONFIG_FILE = args.config config = yaml.load(open(CONFIG_FILE), Loader=yaml.Loader) pprint("Loaded YAML config from %s" % CONFIG_FILE) - + # Which broker type do we use here? broker = None if 'elasticsearch' in config and config['elasticsearch'].get('enabled', False): @@ -130,14 +139,14 @@ def main(): else: pprint("Using HTTP JSON broker model") broker = plugins.brokers.kibbleJSON.Broker(config) - + orgNo = 0 sourceNo = 0 for org in broker.organisations(): if not args.org or args.org == org.id: pprint("Processing organisation %s" % org.id) orgNo += 1 - + # Compile source list # If --age is passed, only append source that either # have never been scanned, or have been scanned more than @@ -161,21 +170,21 @@ def main(): if not args.source or (args.source == source['sourceID']) or (args.source == source['sourceURL']): PENDING_OBJECTS.append(source) sourceNo += len(PENDING_OBJECTS) - + # Start up some threads equal to number of cores on the box, # but no more than 4. We don't want an IOWait nightmare. threads = [] core_count = min((4, int( multiprocessing.cpu_count() ))) for i in range(0, core_count): - sThread = scanThread(broker, org, i+1, args.type, args.exclude) + sThread = scanThread(broker, org, i+1, args.type, args.exclude, args.filter) sThread.start() threads.append(sThread) - + # Wait for them all to finish. for t in threads: t.join() - + pprint("All done scanning for now, found %i organisations and %i sources to process." % (orgNo, sourceNo)) - + if __name__ == '__main__': main() diff --git a/src/plugins/brokers/kibbleES.py b/src/plugins/brokers/kibbleES.py index a581f30..8ad1330 100644 --- a/src/plugins/brokers/kibbleES.py +++ b/src/plugins/brokers/kibbleES.py @@ -20,6 +20,7 @@ import elasticsearch.helpers import threading import sys +import traceback KIBBLE_DB_VERSION = 2 # Current DB struct version ACCEPTED_DB_VERSIONS = [1,2] # Versions we know how to work with. @@ -33,7 +34,7 @@ class _KibbleESWrapper(object): def __init__(self, ES): self.ES = ES self.indices = self.indicesClass(ES) - + def get(self, index, doc_type, id): return self.ES.get(index = index+'_'+doc_type, doc_type = '_doc', id = id) def exists(self, index, doc_type, id): @@ -57,12 +58,12 @@ def count(self, index, doc_type, body = None): doc_type = '_doc', body = body ) - + class indicesClass(object): """ Indices helper class """ def __init__(self, ES): self.ES = ES - + def exists(self, index): return self.ES.indices.exists(index = index) @@ -76,9 +77,9 @@ def __init__(self, ES, auth): self.ES = ES.options(basic_auth=auth) else: self.ES = ES - + self.indices = self.indicesClass(ES) - + def get(self, index, doc_type, id): return self.ES.get(index = index+'_'+doc_type, id = id) def exists(self, index, doc_type, id): @@ -100,17 +101,18 @@ def count(self, index, doc_type, body = None): index = index+'_'+doc_type, body = body ) - + class indicesClass(object): """ Indices helper class """ def __init__(self, ES): self.ES = ES - + def exists(self, index): return self.ES.indices.exists(index = index) + # This is redundant, refactor later? def pprint(string, err = False): line = "[core]: %s" % (string) @@ -121,7 +123,7 @@ def pprint(string, err = False): class KibbleBit: """ KibbleBit class with direct ElasticSearch access """ - + def __init__(self, broker, organisation, tid): self.config = broker.config self.organisation = organisation @@ -131,20 +133,20 @@ def __init__(self, broker, organisation, tid): self.pluginname = "" self.tid = tid self.dbname = self.broker.config['elasticsearch']['database'] - + def __del__(self): """ On unload/delete, push the last chunks of data to ES """ if self.json_queue: print("Pushing stragglers") self.bulk() - + def pprint(self, string, err = False): line = "[thread#%i:%s]: %s" % (self.tid, self.pluginname, string) if err: sys.stderr.write(line + "\n") else: print(line) - + def updateSource(self, source): """ Updates a source document, usually with a status update """ self.broker.DB.index(index=self.broker.config['elasticsearch']['database'], @@ -152,23 +154,23 @@ def updateSource(self, source): id=source['sourceID'], body = source ) - + def get(self, doctype, docid): """ Fetches a document from the DB """ doc = self.broker.DB.get(index=self.broker.config['elasticsearch']['database'], doc_type=doctype, id = docid) if doc: return doc['_source'] return None - + def exists(self, doctype, docid): """ Checks whether a document already exists or not """ return self.broker.DB.exists(index=self.broker.config['elasticsearch']['database'], doc_type=doctype, id = docid) - + def index(self, doctype, docid, document): """ Adds a new document to the index """ dbname = self.broker.config['elasticsearch']['database'] - self.broker.DB.index(index=dbname, doc_type = doctype, id = docid, body = document) - + self.broker.DB.index(index=dbname, doc_type = doctype, id = docid, body = document) + def append(self, t, doc): """ Append a document to the bulk push queue """ if not 'id' in doc: @@ -180,7 +182,7 @@ def append(self, t, doc): if len(self.json_queue) > self.queueMax: pprint("Bulk push forced") self.bulk() - + def bulk(self): """ Push pending JSON objects in the queue to ES""" xjson = self.json_queue @@ -193,6 +195,7 @@ def bulk(self): dbname = self.broker.config['elasticsearch']['database'] if self.broker.noTypes: dbname += "_%s" % js['doctype'] + #del doc['doctype'] defaultJSON = { '_op_type': 'update' if js.get('upsert') else 'index', '_index': dbname, @@ -213,28 +216,30 @@ def bulk(self): 'doc_as_upsert': True, }) try: - elasticsearch.helpers.bulk(self.broker.oDB, js_arr) + res = elasticsearch.helpers.bulk(self.broker.oDB, js_arr) + print("Result (success,failed): ", res) except Exception as err: + print("Error for INPUT JSON %s." % js_arr) pprint("Warning: Could not bulk insert: %s" % err) self.traceBack() - + def traceBack(self): err_type, err_value, tb = sys.exc_info() traceback_output = ['API traceback:'] traceback_output += traceback.format_tb(tb) traceback_output.append('%s: %s' % (err_type.__name__, err_value)) - pprint("Error: traceback_output: %s" % (traceback_output)) + print("Traceback: ", traceback_output ) return traceback_output - + class KibbleOrganisation: """ KibbleOrg with direct ElasticSearch access """ def __init__(self, broker, org): """ Init an org, set up ElasticSearch for KibbleBits later on """ - + self.broker = broker self.id = org - + def sources(self, sourceType = None, view = None): """ Get all sources or sources of a specific type for an org """ s = [] @@ -280,7 +285,7 @@ def sources(self, sourceType = None, view = None): } } ) - + for hit in res['hits']['hits']: if sourceType == None or hit['_source']['type'] == sourceType: s.append(hit['_source']) @@ -294,7 +299,7 @@ def __init__(self, config): if 'user' in es_config: auth = (es_config['user'], es_config['password']) pprint("Connecting to ElasticSearch database at %s:%i..." % (es_config['hostname'], es_config.get('port', 9200))) - + defaultELConfig = { 'host': es_config['hostname'], 'port': int(es_config.get('port', 9200)) @@ -309,7 +314,7 @@ def __init__(self, config): defaultELConfig['verify_certs'] = False defaultELConfig['url_prefix'] = es_config.get('uri', '') defaultELConfig['http_auth'] = auth - + es = elasticsearch.Elasticsearch([ defaultELConfig ], max_retries=5, retry_on_timeout=True @@ -352,11 +357,11 @@ def __init__(self, config): if apidoc['dbversion'] < KIBBLE_DB_VERSION: sys.stderr.write("The database '%s' uses an older structure format (version %u) than the scanners (version %u). Please upgrade your main Kibble server.\n" % (es_config['database'], apidoc['dbversion'], KIBBLE_DB_VERSION)) sys.exit(-1) - + def organisations(self): """ Return a list of all organisations """ orgs = [] - + # Run the search, fetch all orgs, 9999 max. TODO: Scroll??? res = self.DB.search( index=self.config['elasticsearch']['database'], @@ -368,10 +373,8 @@ def organisations(self): } } ) - + for hit in res['hits']['hits']: org = hit['_source']['id'] orgClass = KibbleOrganisation(self, org) yield orgClass - - diff --git a/src/plugins/scanners/bugzilla.py b/src/plugins/scanners/bugzilla.py index 4d9ca37..447af26 100644 --- a/src/plugins/scanners/bugzilla.py +++ b/src/plugins/scanners/bugzilla.py @@ -120,10 +120,10 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): else: pass #print("Ticket hasn't changed, ignoring...") - + if parseIt: KibbleBit.pprint("Parsing data from BugZilla for #%s" % key) - + params = { 'ids': [int(key)], 'limit': 0 @@ -132,7 +132,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): params['Bugzilla_login'] = source['creds']['username'] params['Bugzilla_password'] = source['creds']['password'] ticketsURL = "%s?method=Bug.get¶ms=[%s]" % (u, urllib.parse.quote(json.dumps(params))) - + js = plugins.utils.jsonapi.get(ticketsURL) js= js['result']['bugs'][0] creator = { @@ -162,17 +162,17 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): ticketsURL = "%s?method=Bug.comments¶ms=[%s]" % (u, urllib.parse.quote(json.dumps(params))) hjs = plugins.utils.jsonapi.get(ticketsURL) comments = len(hjs['result']['bugs'][str(key)]['comments']) - + title = bug['summary'] del params['ids'] if closer: - + pid = hashlib.sha1( ("%s%s" % (source['organisation'], closer['email'])).encode('ascii', errors='replace')).hexdigest() found = KibbleBit.exists('person', pid) if not found: params['names'] = [closer['email']] ticketsURL = "%s?method=User.get¶ms=[%s]" % (u, urllib.parse.quote(json.dumps(params))) - + try: ujs = plugins.utils.jsonapi.get(ticketsURL) displayName = ujs['result']['users'][0]['real_name'] @@ -180,7 +180,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): displayName = closer['email'] if displayName and len(displayName) > 0: # Add to people db - + jsp = { 'name': displayName, 'email': closer['email'], @@ -189,7 +189,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): } #print("Updating person DB for closer: %s (%s)" % (displayName, closerEmail)) KibbleBit.index('person', pid, jsp) - + if creator: pid = hashlib.sha1( ("%s%s" % (source['organisation'], creator['email'])).encode('ascii', errors='replace')).hexdigest() found = KibbleBit.exists('person', pid) @@ -204,7 +204,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): creator['name'] = creator['email'] if creator['name'] and len(creator['name']) > 0: # Add to people db - + jsp = { 'name': creator['name'], 'email': creator['email'], @@ -212,7 +212,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): 'id' :pid } KibbleBit.index('person', pid, jsp) - + jso = { 'id': dhash, 'key': key, @@ -223,7 +223,7 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): 'created': cd, 'closed': rd, 'issuetype': 'issue', - 'issueCloser': closer['email'] if 'email' in closer else None, + 'issueCloser': closer['email'] if 'email' in closer else None, 'createdDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)), 'closedDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd)) if rd else None, 'changeDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd)), @@ -238,8 +238,8 @@ def scanTicket(bug, KibbleBit, source, openTickets, u, dom): except Exception as err: KibbleBit.pprint(err) return False - - + + class bzThread(Thread): @@ -252,10 +252,10 @@ def __init__(self, KibbleBit, source, block, pt, ot, u, dom): self.openTickets = ot self.u = u self.dom = dom - + def run(self): badOnes = 0 - + while len(self.pendingTickets) > 0 and badOnes <= 50: if len(self.pendingTickets) % 10 == 0: self.KibbleBit.pprint("%u elements left to count" % len(self.pendingTickets)) @@ -284,13 +284,16 @@ def run(self): return else: badOnes = 0 - + def scan(KibbleBit, source): path = source['sourceID'] url = source['sourceURL'] - + + if not 'steps' in source: + source['steps'] = {} + source['steps']['issues'] = { 'time': time.time(), 'status': 'Parsing BugZilla changes...', @@ -298,7 +301,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + bz = re.match(r"(https?://\S+?)(/jsonrpc\.cgi)?[\s:?]+(.+)", url) if bz: if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: @@ -313,10 +316,10 @@ def scan(KibbleBit, source): u = "%s/jsonrpc.cgi" % dom instance = bz.group(3) lastTicket = 0 - + params = { 'product': [instance], - 'status': ["RESOLVED", "CLOSED", "NEW","UNCOMFIRMED","ASSIGNED","REOPENED","VERIFIED"], + 'status': ["RESOLVED", "CLOSED", "NEW","UNCOMFIRMED","ASSIGNED","REOPENED","VERIFIED"], 'include_fields': ['id', 'creation_time', 'status', 'summary', 'creator'], 'limit': 10000, 'offset': 1 @@ -324,12 +327,12 @@ def scan(KibbleBit, source): # If * is requested, just omit the product name if instance == '*': params = { - 'status': ["RESOLVED", "CLOSED", "NEW","UNCOMFIRMED","ASSIGNED","REOPENED","VERIFIED"], + 'status': ["RESOLVED", "CLOSED", "NEW","UNCOMFIRMED","ASSIGNED","REOPENED","VERIFIED"], 'include_fields': ['id', 'creation_time', 'status', 'summary', 'creator'], 'limit': 10000, 'offset': 1 } - + ticketsURL = "%s?method=Bug.search¶ms=[%s]" % (u, urllib.parse.quote(json.dumps(params))) while True: @@ -338,7 +341,7 @@ def scan(KibbleBit, source): except: KibbleBit.pprint("Couldn't fetch more tickets, bailing") break - + if len(js['result']['bugs']) > 0: KibbleBit.pprint("%s: Found %u tickets..." % (source['sourceURL'], ((params.get('offset', 1)-1) + len(js['result']['bugs'])))) for bug in js['result']['bugs']: @@ -350,7 +353,7 @@ def scan(KibbleBit, source): else: KibbleBit.pprint("No more tickets left to scan") break - + KibbleBit.pprint("Found %u open tickets, %u closed." % (len(openTickets), len(pendingTickets) - len(openTickets))) badOnes = 0 @@ -360,10 +363,10 @@ def scan(KibbleBit, source): t = bzThread(KibbleBit, source, block, pendingTickets, openTickets, u, dom) threads.append(t) t.start() - + for t in threads: t.join() - + source['steps']['issues'] = { 'time': time.time(), diff --git a/src/plugins/scanners/buildbot.py b/src/plugins/scanners/buildbot.py index b99f5bf..566e86c 100644 --- a/src/plugins/scanners/buildbot.py +++ b/src/plugins/scanners/buildbot.py @@ -30,7 +30,7 @@ """ title = "Scanner for Buildbot" -version = "0.1.0" +version = "0.1.1" def accepts(source): """ Determines whether we want to handle this source """ @@ -41,17 +41,17 @@ def accepts(source): def scanJob(KibbleBit, source, job, creds): """ Scans a single job for activity """ - NOW = int(datetime.datetime.utcnow().timestamp()) + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceID'], job) ).encode('ascii', errors='replace')).hexdigest() found = True doc= None parseIt = False found = KibbleBit.exists('cijob', dhash) - + jobURL = "%s/api/v2/builders/%s/builds" % (source['sourceURL'], job) KibbleBit.pprint(jobURL) jobjson = plugins.utils.jsonapi.get(jobURL, auth = creds) - + # If valid JSON, ... if jobjson: for buildno, data in jobjson.items(): @@ -61,16 +61,16 @@ def scanJob(KibbleBit, source, job, creds): builddoc = KibbleBit.get('ci_build', buildhash) except: pass - + # If this build already completed, no need to parse it again if builddoc and builddoc.get('completed', False): continue - + KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." % (job, buildno)) - + completed = True if 'currentStep' in data else False - - + + # Get build status (success, failed, canceled etc) status = 'building' if 'successful' in data.get('text', []): @@ -79,7 +79,7 @@ def scanJob(KibbleBit, source, job, creds): status = 'failed' if 'exception' in data.get('text', []): status = 'aborted' - + DUR = 0 # Calc when the build finished if completed and len(data.get('times', [])) == 2 and data['times'][1]: @@ -87,7 +87,7 @@ def scanJob(KibbleBit, source, job, creds): DUR = FIN - data['times'][0] else: FIN = 0 - + doc = { # Build specific data 'id': buildhash, @@ -100,7 +100,7 @@ def scanJob(KibbleBit, source, job, creds): 'status': status, 'started': int(data['times'][0]), 'ci': 'buildbot', - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], @@ -109,7 +109,7 @@ def scanJob(KibbleBit, source, job, creds): KibbleBit.append('ci_build', doc) # Yay, it worked! return True - + # Boo, it failed! KibbleBit.pprint("Fetching job data failed!") return False @@ -124,7 +124,7 @@ def __init__(self, block, KibbleBit, source, creds, jobs): self.creds = creds self.source = source self.jobs = jobs - + def run(self): badOnes = 0 while len(self.jobs) > 0 and badOnes <= 50: @@ -158,7 +158,9 @@ def scan(KibbleBit, source): # Simple URL check buildbot = re.match(r"(https?://.+)", source['sourceURL']) if buildbot: - + if not 'steps' in source: + source['steps'] = {} + source['steps']['ci'] = { 'time': time.time(), 'status': 'Parsing Buildbot job changes...', @@ -166,7 +168,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + badOnes = 0 pendingJobs = [] KibbleBit.pprint("Parsing Buildbot activity at %s" % source['sourceURL']) @@ -177,22 +179,22 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - - # Buildbot may neeed credentials + + # Buildbot may need credentials creds = None if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: creds = "%s:%s" % (source['creds']['username'], source['creds']['password']) - + # Get the job list sURL = source['sourceURL'] KibbleBit.pprint("Getting job list...") builders = plugins.utils.jsonapi.get("%s/api/v2/builders" % sURL , auth = creds) - + # Save queue snapshot - NOW = int(datetime.datetime.utcnow().timestamp()) + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source['organisation'], source['sourceID'], int(time.time())) ).encode('ascii', errors='replace')).hexdigest() - - + + # Scan queue items blocked = 0 stuck = 0 @@ -202,7 +204,7 @@ def scan(KibbleBit, source): actualQueueSize = 0 building = 0 jobs = [] - + for builder, data in builders.items(): jobs.append(builder) if data['state'] == 'building': @@ -217,8 +219,8 @@ def scan(KibbleBit, source): # Stuck builds (iow no builder available) if data['state'] == 'offline': stuck += data.get('pendingBuilds', 0) - - + + # Write up a queue doc queuedoc = { 'id': queuehash, @@ -229,16 +231,16 @@ def scan(KibbleBit, source): 'stuck': stuck, 'building': building, 'ci': 'buildbot', - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], 'upsert': True, } KibbleBit.append('ci_queue', queuedoc) - + KibbleBit.pprint("Found %u builders in Buildbot" % len(jobs)) - + threads = [] block = threading.Lock() KibbleBit.pprint("Scanning jobs using 4 sub-threads") @@ -246,11 +248,11 @@ def scan(KibbleBit, source): t = buildbotThread(block, KibbleBit, source, creds, jobs) threads.append(t) t.start() - + for t in threads: t.join() - # We're all done, yaay + # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source['sourceURL']) source['steps']['ci'] = { @@ -260,4 +262,3 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - \ No newline at end of file diff --git a/src/plugins/scanners/discourse.py b/src/plugins/scanners/discourse.py index d160baa..9ecae03 100644 --- a/src/plugins/scanners/discourse.py +++ b/src/plugins/scanners/discourse.py @@ -30,7 +30,7 @@ """ title = "Scanner for Discourse Forums" -version = "0.1.0" +version = "0.1.1" def accepts(source): """ Determines whether we want to handle this source """ @@ -41,30 +41,30 @@ def accepts(source): def scanJob(KibbleBit, source, cat, creds): """ Scans a single discourse category for activity """ - NOW = int(datetime.datetime.utcnow().timestamp()) - + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) + # Get $discourseURL/c/$catID - + catURL = os.path.join(source['sourceURL'], "c/%s" % cat['id']) KibbleBit.pprint("Scanning Discourse category '%s' at %s" % (cat['slug'], catURL)) - + page = 0 allUsers = {} - + # For each paginated result (up to page 100), check for changes while page < 100: pcatURL = "%s?page=%u" % (catURL, page) catjson = plugins.utils.jsonapi.get(pcatURL, auth = creds) page += 1 - - + + if catjson: - + # If we hit an empty list (no more topics), just break the loop. if not catjson['topic_list']['topics']: break - - # First (if we have data), we should store the known users + + # First (if we have data), we should store the known users # Since discourse hides the email (obviously!), we'll have to # fake one to generate an account. fakeDomain = "foo.discourse" @@ -75,7 +75,7 @@ def scanJob(KibbleBit, source, cat, creds): # Fake email address, compute deterministic ID email = "%s@%s" % (user['username'], fakeDomain) dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], email) ).encode('ascii', errors='replace')).hexdigest() - + # Construct a very sparse user document userDoc = { 'id': dhash, @@ -83,29 +83,29 @@ def scanJob(KibbleBit, source, cat, creds): 'name': user['username'], 'email': email, } - + # Store user-ID-to-username mapping for later allUsers[user['id']] = userDoc - + # Store it (or, queue storage) unless it exists. # We don't wanna override better data, so we check if # it's there first. if not KibbleBit.exists('person', dhash): KibbleBit.append('person', userDoc) - + # Now, for each topic, we'll store a topic document for topic in catjson['topic_list']['topics']: - + # Calculate topic ID dhash = hashlib.sha224( ("%s-%s-topic-%s" % (source['organisation'], source['sourceURL'], topic['id']) ).encode('ascii', errors='replace')).hexdigest() - + # Figure out when topic was created and updated CreatedDate = datetime.datetime.strptime(topic['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() if topic.get('last_posted_at'): UpdatedDate = datetime.datetime.strptime(topic['last_posted_at'], "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() else: UpdatedDate = 0 - + # Determine whether we should scan this topic or continue to the next one. # We'll do this by seeing if the topic already exists and has no changes or not. if KibbleBit.exists('forum_topic', dhash): @@ -113,14 +113,14 @@ def scanJob(KibbleBit, source, cat, creds): # If update in the old doc was >= current update timestamp, skip the topic if fdoc['updated'] >= UpdatedDate: continue - - + + # Assuming we need to scan this, start by making the base topic document topicdoc = { 'id': dhash, 'sourceID': source['sourceID'], 'organisation': source['organisation'], - + 'type': 'discourse', 'category': cat['slug'], 'title': topic['title'], @@ -134,23 +134,23 @@ def scanJob(KibbleBit, source, cat, creds): 'views': topic['views'], 'url': source['sourceURL'] + "/t/%s/%s" % (topic['slug'], topic['id']) } - + KibbleBit.append('forum_topic', topicdoc) KibbleBit.pprint("%s is new or changed, scanning" % topicdoc['url']) - + # Now grab all the individual replies/posts # Remember to not have it count as a visit! pURL = "%s?track_visit=false&forceLoad=true" % topicdoc['url'] pjson = plugins.utils.jsonapi.get(pURL, auth = creds) - + posts = pjson['post_stream']['posts'] - + # For each post/reply, construct a forum_entry document KibbleBit.pprint("%s has %u posts" % (pURL, len(posts))) for post in posts: phash = hashlib.sha224( ("%s-%s-post-%s" % (source['organisation'], source['sourceURL'], post['id']) ).encode('ascii', errors='replace')).hexdigest() uname = post.get('name', post['username']) or post['username'] # Hack to get longest non-zero value - + # Find the hash of the person who posted it # We may know them, or we may have to store them. # If we have better info now (full name), re-store @@ -160,7 +160,7 @@ def scanJob(KibbleBit, source, cat, creds): # Same as before, fake email, store... email = "%s@%s" % (post['username'], fakeDomain) uhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], email) ).encode('ascii', errors='replace')).hexdigest() - + # Construct a very sparse user document userDoc = { 'id': uhash, @@ -168,22 +168,22 @@ def scanJob(KibbleBit, source, cat, creds): 'name': uname, 'email': email, } - + # Store user-ID-to-username mapping for later allUsers[user['id']] = userDoc - + # Store it (or, queue storage) KibbleBit.append('person', userDoc) - + # Get post date CreatedDate = datetime.datetime.strptime(post['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() - + # Store the post/reply document pdoc = { 'id': phash, 'sourceID': source['sourceID'], 'organisation': source['organisation'], - + 'type': 'discourse', 'creator': uhash, 'created': CreatedDate, @@ -209,7 +209,7 @@ def __init__(self, block, KibbleBit, source, creds, jobs): self.creds = creds self.source = source self.jobs = jobs - + def run(self): badOnes = 0 while len(self.jobs) > 0 and badOnes <= 50: @@ -243,7 +243,9 @@ def scan(KibbleBit, source): # Simple URL check discourse = re.match(r"(https?://.+)", source['sourceURL']) if discourse: - + if not 'steps' in source: + source['steps'] = {} + source['steps']['forum'] = { 'time': time.time(), 'status': 'Parsing Discourse topics...', @@ -251,7 +253,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + badOnes = 0 pendingJobs = [] KibbleBit.pprint("Parsing Discourse activity at %s" % source['sourceURL']) @@ -262,22 +264,22 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + # Discourse may neeed credentials (if basic auth) creds = None if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: creds = "%s:%s" % (source['creds']['username'], source['creds']['password']) - + # Get the list of categories sURL = source['sourceURL'] KibbleBit.pprint("Getting categories...") catjs = plugins.utils.jsonapi.get("%s/categories_and_latest" % sURL , auth = creds) - + # Directly assign the category list as pending jobs queue, ezpz. pendingJobs = catjs['category_list']['categories'] - + KibbleBit.pprint("Found %u categories" % len(pendingJobs)) - + # Now fire off 4 threads to parse the categories threads = [] block = threading.Lock() @@ -286,11 +288,11 @@ def scan(KibbleBit, source): t = discourseThread(block, KibbleBit, source, creds, pendingJobs) threads.append(t) t.start() - + for t in threads: t.join() - # We're all done, yaay + # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source['sourceURL']) source['steps']['forum'] = { @@ -300,4 +302,3 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - \ No newline at end of file diff --git a/src/plugins/scanners/gerrit.py b/src/plugins/scanners/gerrit.py index 86dd327..70148a1 100644 --- a/src/plugins/scanners/gerrit.py +++ b/src/plugins/scanners/gerrit.py @@ -154,6 +154,9 @@ def status_changed(stored_change, change): return stored_change['status'] != change['status'] def scan(KibbleBit, source): + if not 'steps' in source: + source['steps'] = {} + source['steps']['issues'] = { 'time': time.time(), 'status': 'Analyzing Gerrit tickets...', @@ -226,7 +229,7 @@ def scan(KibbleBit, source): except requests.HTTPError as e: print(e) - + source['steps']['issues'] = { 'time': time.time(), 'status': 'Done analyzing tickets!', diff --git a/src/plugins/scanners/git-census.py b/src/plugins/scanners/git-census.py index f5cc69d..b0327fc 100644 --- a/src/plugins/scanners/git-census.py +++ b/src/plugins/scanners/git-census.py @@ -50,8 +50,11 @@ def scan(KibbleBit, source): url = source['sourceURL'] rootpath = "%s/%s/git" % (KibbleBit.config['scanner']['scratchdir'], source['organisation']) gpath = os.path.join(rootpath, rid) - - if 'steps' in source and source['steps']['sync']['good'] and os.path.exists(gpath): + + if not 'steps' in source: + source['steps'] = {} + + if source['steps']['sync']['good'] and os.path.exists(gpath): source['steps']['census'] = { 'time': time.time(), 'status': 'Census count started at ' + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), @@ -177,7 +180,7 @@ def scan(KibbleBit, source): # Make a list of changed files, max 1024 filelist = list(files_touched) filelist = filelist[:1023] - + # ES commit documents tsd = ts - (ts % 86400) js = { @@ -222,7 +225,7 @@ def scan(KibbleBit, source): 'organisation': source['organisation'], 'id' : hashlib.sha1( ("%s%s" % (source['organisation'], ce)).encode('ascii', errors='replace')).hexdigest() }) - KibbleBit.append ( 'person', + KibbleBit.append ( 'person', { 'upsert': True, 'name': an, @@ -234,7 +237,7 @@ def scan(KibbleBit, source): ) KibbleBit.append('code_commit', js) KibbleBit.append('code_commit_unique', jsx) - + if True: # Do file changes?? Might wanna make this optional KibbleBit.pprint("Scanning file changes for %s" % source['sourceURL']) for filename in modificationDates: @@ -257,7 +260,7 @@ def scan(KibbleBit, source): del jsfe['created'] del jsfe['createdDate'] KibbleBit.append('file_history', jsfe) - + source['steps']['census'] = { 'time': time.time(), 'status': 'Census count completed at ' + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), @@ -266,5 +269,3 @@ def scan(KibbleBit, source): } source['census'] = time.time() KibbleBit.updateSource(source) - - diff --git a/src/plugins/scanners/git-evolution.py b/src/plugins/scanners/git-evolution.py index 8ed648c..b9b6ca5 100644 --- a/src/plugins/scanners/git-evolution.py +++ b/src/plugins/scanners/git-evolution.py @@ -16,14 +16,13 @@ # limitations under the License. """ Git Evolution scanner """ +import importlib import os import subprocess -import re import time import calendar import datetime -import plugins.utils.git -import plugins.utils.sloc + import hashlib from collections import namedtuple @@ -68,7 +67,7 @@ def release(KibbleBit, source, status, exception=None, good=False): if exception: source['steps']['evolution'].update({'exception': exception}) KibbleBit.updateSource(source) - + def check_branch(gpath, date, branch): try: @@ -114,48 +113,48 @@ def find_branch(date, gpath): def scan(KibbleBit, source): - + rid = source['sourceID'] url = source['sourceURL'] rootpath = "%s/%s/git" % (KibbleBit.config['scanner']['scratchdir'], source['organisation']) gpath = os.path.join(rootpath, rid) - + gname = source['sourceID'] KibbleBit.pprint("Doing evolution scan of %s" % gname) - + inp = get_first_ref(gpath) if inp: ts = int(inp.split()[0]) ts = ts - (ts % 86400) date = time.strftime("%Y-%b-%d 0:00", time.gmtime(ts)) - + #print("Starting from %s" % date) now = time.time() - + rid = source['sourceID'] url = source['sourceURL'] rootpath = "%s/%s/git" % (KibbleBit.config['scanner']['scratchdir'], source['organisation']) gpath = os.path.join(rootpath, rid) - + if source['steps']['sync']['good'] and os.path.exists(gpath): acquire(KibbleBit, source) branch = find_branch(date, gpath) - + if not branch: release(source, "Could not do evolutionary scan of code", "No default branch was found in this repository") return - + branch_exists = check_branch(gpath, date, branch) - + if not branch_exists: KibbleBit.pprint("Not trunk either (bad repo?), skipping") release(source, "Could not do evolutionary scan of code", "No default branch was found in this repository") return - + try: - + d = time.gmtime(now) year = d[0] quarter = d[1] - (d[1] % 3) @@ -166,7 +165,7 @@ def scan(KibbleBit, source): pd = datetime.datetime(year, quarter, 1).replace(tzinfo=datetime.timezone.utc).timetuple() date = time.strftime("%Y-%b-%d 0:00", pd) unix = calendar.timegm(pd) - + # Skip the dates we've already processed dhash = hashlib.sha224((source['sourceID'] + date).encode('ascii', 'replace')).hexdigest() @@ -174,7 +173,8 @@ def scan(KibbleBit, source): if not found: checkout(gpath, date, branch) KibbleBit.pprint("Running cloc on %s (%s) at %s" % (gname, source['sourceURL'], date)) - languages, codecount, comment, blank, years, cost = plugins.utils.sloc.count(gpath) + sloc = importlib.import_module("plugins.utils.sloc") + languages, codecount, comment, blank, years, cost = sloc.count(gpath) js = { 'time': unix, 'sourceID': source['sourceID'], @@ -192,7 +192,7 @@ def scan(KibbleBit, source): if quarter <= 0: quarter += 12 year -= 1 - + # decrease month by 3 now = time.mktime(datetime.date(year, quarter, 1).timetuple()) except Exception as e: @@ -201,9 +201,7 @@ def scan(KibbleBit, source): time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), str(e)) return - + release(KibbleBit, source, "Evolution scan completed at " + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), good=True) - - \ No newline at end of file diff --git a/src/plugins/scanners/git-sloc.py b/src/plugins/scanners/git-sloc.py index e2294f7..0b202ce 100644 --- a/src/plugins/scanners/git-sloc.py +++ b/src/plugins/scanners/git-sloc.py @@ -16,19 +16,17 @@ # limitations under the License. """ Source Lines of Code counter for Git """ - - +import importlib import os import sys import subprocess import time import shutil -import plugins.utils.git -import plugins.utils.sloc + import re title = "SloC Counter for Git" -version = "0.1.0" +version = "0.1.1" def accepts(source): """ Do we accept this source? """ @@ -40,12 +38,15 @@ def accepts(source): return False def scan(KibbleBit, source): - + rid = source['sourceID'] url = source['sourceURL'] rootpath = "%s/%s/git" % (KibbleBit.config['scanner']['scratchdir'], source['organisation']) gpath = os.path.join(rootpath, rid) - + + if not 'steps' in source: + source['steps'] = {} + if source['steps']['sync']['good'] and os.path.exists(gpath): source['steps']['count'] = { 'time': time.time(), @@ -54,17 +55,19 @@ def scan(KibbleBit, source): 'good': True, } KibbleBit.updateSource(source) - + + git = importlib.import_module("plugins.utils.git") try: - branch = plugins.utils.git.defaultBranch(source, gpath) + branch = git.defaultBranch(source, gpath) subprocess.call('cd %s && git checkout %s' % (gpath, branch), shell = True) except: KibbleBit.pprint("SLoC counter failed to find main branch for %s!!" % url) return False - + KibbleBit.pprint("Running SLoC count for %s" % url) - languages, codecount, comment, blank, years, cost = plugins.utils.sloc.count(gpath) - + sloc = importlib.import_module("plugins.utils.sloc") + languages, codecount, comment, blank, years, cost = sloc.count(gpath) + sloc = { 'sourceID': source['sourceID'], 'loc': codecount, diff --git a/src/plugins/scanners/git-sync.py b/src/plugins/scanners/git-sync.py index 234e33b..64bec50 100644 --- a/src/plugins/scanners/git-sync.py +++ b/src/plugins/scanners/git-sync.py @@ -15,16 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. - import os import sys import subprocess import time import shutil -import plugins.utils.git + +import plugins.utils.git as git title = "Sync plugin for Git repositories" -version = "0.1.2" +version = "0.1.3" def accepts(source): """ Do we accept this source? """ @@ -34,15 +34,15 @@ def accepts(source): if source['type'] == 'github' and source.get('issuesonly', False) == False: return True return False - + def scan(KibbleBit, source): - + #KibbleBit.pprint("Scan source: %s." % source) # Get some vars, construct a data path for the repo path = source['sourceID'] url = source['sourceURL'] rootpath = "%s/%s/git" % (KibbleBit.config['scanner']['scratchdir'], source['organisation']) - + # If the root path does not exist, try to make it recursively. if not os.path.exists(rootpath): try: @@ -57,14 +57,14 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - + # This is were the repo should be cloned datapath = os.path.join(rootpath, path) - + KibbleBit.pprint("Checking out %s as %s" % (url, path)) try: - if 'steps' not in source: # initial fetch of a github repo may miss steps + if 'steps' not in source: # initial fetch of a github repo may miss steps source['steps'] = {} source['steps']['sync'] = { 'time': time.time(), @@ -73,13 +73,13 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + # If we already checked this out earlier, just sync it. if os.path.exists(datapath): KibbleBit.pprint("Repo %s exists, fetching changes..." % datapath) - + # Do we have a default branch here? - branch = plugins.utils.git.defaultBranch(source, datapath, KibbleBit) + branch = git.defaultBranch(source, datapath, KibbleBit) if len(branch) == 0: source['default_branch'] = branch source['steps']['sync'] = { @@ -113,7 +113,7 @@ def scan(KibbleBit, source): fcommit = fcommit.decode('ascii').strip() subprocess.check_call("cd %s && git reset --hard %s" % (datapath, fcommit), shell = True, stderr=subprocess.STDOUT) try: - subprocess.check_call("cd %s && git clean -xfd" % datpath, shell = True, stderr=subprocess.STDOUT) + subprocess.check_call("cd %s && git clean -xfd" % datapath, shell = True, stderr=subprocess.STDOUT) except: pass # This is a new repo, clone it! @@ -133,7 +133,7 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - + # All good, yay! source['steps']['sync'] = { 'time': time.time(), @@ -142,4 +142,3 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - diff --git a/src/plugins/scanners/github-issues.py b/src/plugins/scanners/github-issues.py index ebab0eb..f6737c4 100644 --- a/src/plugins/scanners/github-issues.py +++ b/src/plugins/scanners/github-issues.py @@ -14,13 +14,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import importlib import re import hashlib from dateutil import parser import time import requests -import plugins.utils.github title = "Scanner for GitHub Issues" version = "0.1.0" @@ -59,7 +58,7 @@ def make_issue(source, issue, people): owner_email = people[issue['user']['login']]['email'] issue_closer = owner_email - if 'closed_by' in issue: + if 'closed_by' in issue and issue['closed_by'] is not None: issue_closer = people[issue['closed_by']['login']] # Is this an issue ro a pull request? itype = "issue" @@ -116,17 +115,19 @@ def update_issue(KibbleBit, issue): def update_person(KibbleBit, person): person['upsert'] = True KibbleBit.append('person', person) - + def scan(KibbleBit, source, firstAttempt = True): auth=None people = {} + github = importlib.import_module("plugins.utils.github") + if 'creds' in source: KibbleBit.pprint("Using auth for repo %s" % source['sourceURL']) creds = source['creds'] if creds and 'username' in creds: auth = (creds['username'], creds['password']) - TL = plugins.utils.github.get_tokens_left(auth=auth) + TL = github.get_tokens_left(auth=auth) KibbleBit.pprint("Scanning for GitHub issues (%u tokens left on GitHub)" % TL) # Have we scanned before? If so, only do a 3 month scan here. doneBefore = False @@ -143,11 +144,11 @@ def scan(KibbleBit, source, firstAttempt = True): if doneBefore: since = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() - (3*30*86400))) KibbleBit.pprint("Fetching changes since %s" % since) - issues = plugins.utils.github.get_all(source, plugins.utils.github.issues, + issues = github.get_all(source, github.issues, params={'filter': 'all', 'state':'all', 'since': since}, auth=auth) else: - issues = plugins.utils.github.get_all(source, plugins.utils.github.issues, + issues = github.get_all(source, github.issues, params={'filter': 'all', 'state':'all'}, auth=auth) KibbleBit.pprint("Fetched %s issues for %s" %(str(len(issues)), source['sourceURL'])) @@ -155,13 +156,14 @@ def scan(KibbleBit, source, firstAttempt = True): for issue in issues: if not issue['user']['login'] in people: - person = make_person(source, issue, plugins.utils.github.user(issue['user']['url'], + person = make_person(source, issue, github.user(issue['user']['url'], auth=auth)) people[issue['user']['login']] = person update_person(KibbleBit, person) - if 'closed_by' in issue and not issue['closed_by']['login'] in people: - closer = make_person(source, issue, plugins.utils.github.user(issue['closed_by']['url'], + #KibbleBit.pprint("issue: %s" % issue ) + if 'closed_by' in issue and issue['closed_by'] is not None and not issue['closed_by']['login'] in people: + closer = make_person(source, issue, github.user(issue['closed_by']['url'], auth=auth)) people[issue['closed_by']['login']] = closer update_person(KibbleBit, closer) @@ -171,12 +173,15 @@ def scan(KibbleBit, source, firstAttempt = True): stored_change = None if KibbleBit.exists('issue', dhash): es_doc = KibbleBit.get('issue', dhash) - if not status_changed(es_doc, doc): + if 'doc' in es_doc: + es_doc = es_doc['doc'] + #KibbleBit.pprint("status %s seen %s." % ('status' in es_doc, 'status' in doc)) + if 'status' in es_doc and 'status' in doc and not status_changed(es_doc, doc): #KibbleBit.pprint("change %s seen already and status unchanged. Skipping." % issue['id']) continue update_issue(KibbleBit, doc) - + source['steps']['issues'] = { 'time': time.time(), 'status': 'Issue scan completed at ' + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), @@ -189,20 +194,20 @@ def scan(KibbleBit, source, firstAttempt = True): # If we errored out because of rate limiting, retry later, otherwise bail if firstAttempt: sleeps = 0 - if plugins.utils.github.get_tokens_left(auth=auth) < 10: + if github.get_tokens_left(auth=auth) < 10: KibbleBit.pprint("Hit rate limits, trying to sleep it off!") - while plugins.utils.github.get_tokens_left(auth=auth) < 10: + while github.get_tokens_left(auth=auth) < 10: sleeps += 1 if sleeps > 24: KibbleBit.pprint("Slept for too long without finding a reset rate limit, giving up!") break time.sleep(300) # Sleep 5 min, then check again.. # If we have tokens, try one more time... - if plugins.utils.github.get_tokens_left(auth=auth) > 10: + if github.get_tokens_left(auth=auth) > 10: scan(KibbleBit, source, False) # If this one fails, bail completely return - - + + KibbleBit.pprint("HTTP Error, rate limit exceeded?") source['steps']['issues'] = { 'time': time.time(), diff --git a/src/plugins/scanners/github-stats.py b/src/plugins/scanners/github-stats.py index 4ac933c..3c8d9f4 100644 --- a/src/plugins/scanners/github-stats.py +++ b/src/plugins/scanners/github-stats.py @@ -14,14 +14,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import hashlib +import importlib import os +import re import sys import subprocess import time import shutil -import plugins.utils.git + +from src.plugins.brokers.kibbleES import KibbleBit title = "Traffic statistics plugin for GitHub repositories" version = "0.1.0" @@ -31,17 +33,17 @@ def accepts(source): if source['type'] == 'github': return True return False - + def getTime(string): """ Convert GitHub timestamp to epoch """ return time.mktime(time.strptime(re.sub(r"Z", "", str(string)), "%Y-%m-%dT%H:%M:%S")) def scan(KibbletBit, source): - + # Get some vars, construct a data path for the repo path = source['sourceID'] url = source['sourceURL'] - + auth=None people = {} if 'creds' in source: @@ -60,12 +62,14 @@ def scan(KibbletBit, source): 'good': True } KibbletBit.updateSource(source) - + # Get views - views = plugins.utils.github.views(url, auth) + github = importlib.import_module("plugins.utils.github") + views = github.views(url, auth) if 'views' in views: for el in views['views']: ts = getTime(el['timestamp']) + #print("reformatted time:", ts) shash = hashlib.sha224( ("%s-%s-%s-clones" %(source['organisation'], url, el['timestamp'])).encode('ascii', errors = 'replace')).hexdigest() bit = { 'organisation': source['organisation'], @@ -78,9 +82,9 @@ def scan(KibbletBit, source): 'id': shash } KibbleBit.append('ghstats', bit) - + # Get clones - clones = plugins.utils.github.clones(url, auth) + clones = github.clones(url, auth) if 'clones' in clones: for el in clones['clones']: ts = getTime(el['timestamp']) @@ -96,12 +100,12 @@ def scan(KibbletBit, source): 'id': shash } KibbleBit.append('ghstats', bit) - + # Get referrers - refs = plugins.utils.github.referrers(url, auth) + refs = github.referrers(url, auth) if refs: for el in refs: - el['timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%S", time.time()) + el['timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%S", time) ts = getTime(el['timestamp']) shash = hashlib.sha224( ("%s-%s-%s-refs" %(source['organisation'], url, el['timestamp'])).encode('ascii', errors = 'replace')).hexdigest() bit = { @@ -118,4 +122,3 @@ def scan(KibbletBit, source): except: pass # All done! - \ No newline at end of file diff --git a/src/plugins/scanners/jenkins.py b/src/plugins/scanners/jenkins.py index c09920c..579ba8b 100644 --- a/src/plugins/scanners/jenkins.py +++ b/src/plugins/scanners/jenkins.py @@ -20,18 +20,21 @@ import re import json import hashlib -import plugins.utils.jsonapi + import threading import requests.exceptions import os import urllib.parse +from plugins.utils import jsonapi + + """ This is the Kibble Jenkins scanner plugin. """ title = "Scanner for Jenkins CI" -version = "0.1.0" +version = "0.2.0" def accepts(source): """ Determines whether we want to handle this source """ @@ -42,39 +45,43 @@ def accepts(source): def scanJob(KibbleBit, source, job, creds): """ Scans a single job for activity """ - NOW = int(datetime.datetime.utcnow().timestamp()) + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) jname = job['name'] if job.get('folder'): jname = job.get('folder') + '-' + job['name'] - dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], jname) ).encode('ascii', errors='replace')).hexdigest() + dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], jname) ) + .encode('ascii', errors='replace')).hexdigest() found = True doc= None parseIt = False found = KibbleBit.exists('cijob', dhash) - + # Get $jenkins/job/$job-name/json... jobURL = "%s/api/json?depth=2&tree=builds[number,status,timestamp,id,result,duration]" % job['fullURL'] KibbleBit.pprint(jobURL) - jobjson = plugins.utils.jsonapi.get(jobURL, auth = creds) - + + jobjson = jsonapi.get(jobURL, auth = creds) + # If valid JSON, ... if jobjson: + print("jobjson builds: %s" %( jobjson)) for build in jobjson.get('builds', []): - buildhash = hashlib.sha224( ("%s-%s-%s-%s" % (source['organisation'], source['sourceURL'], jname, build['id']) ).encode('ascii', errors='replace')).hexdigest() + buildhash = hashlib.sha224( ("%s-%s-%s-%s" % (source['organisation'], source['sourceURL'], jname, build['id']) ) + .encode('ascii', errors='replace')).hexdigest() builddoc = None try: builddoc = KibbleBit.get('ci_build', buildhash) except: pass - + # If this build already completed, no need to parse it again if builddoc and builddoc.get('completed', False): continue - + KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." % (jname, build['id'])) - + completed = True if build['result'] else False - + # Estimate time spent in queue queuetime = 0 TS = int(build['timestamp']/1000) @@ -82,7 +89,7 @@ def scanJob(KibbleBit, source, job, creds): queuetime = builddoc.get('queuetime', 0) if not completed: queuetime = NOW - TS - + # Get build status (success, failed, canceled etc) status = 'building' if build['result'] in ['SUCCESS', 'STABLE']: @@ -91,13 +98,13 @@ def scanJob(KibbleBit, source, job, creds): status = 'failed' if build['result'] in ['ABORTED']: status = 'aborted' - + # Calc when the build finished (jenkins doesn't show this) if completed: FIN = int(build['timestamp'] + build['duration']) / 1000 else: FIN = 0 - + doc = { # Build specific data 'id': buildhash, @@ -111,7 +118,7 @@ def scanJob(KibbleBit, source, job, creds): 'started': int(build['timestamp']/1000), 'ci': 'jenkins', 'queuetime': queuetime, - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], @@ -120,7 +127,7 @@ def scanJob(KibbleBit, source, job, creds): KibbleBit.append('ci_build', doc) # Yay, it worked! return True - + # Boo, it failed! KibbleBit.pprint("Fetching job data failed!") return False @@ -135,7 +142,7 @@ def __init__(self, block, KibbleBit, source, creds, jobs): self.creds = creds self.source = source self.jobs = jobs - + def run(self): badOnes = 0 while len(self.jobs) > 0 and badOnes <= 50: @@ -160,7 +167,8 @@ def run(self): self.KibbleBit.pprint("Too many errors, bailing!") self.source['steps']['issues'] = { 'time': time.time(), - 'status': 'Too many errors while parsing at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), + 'status': 'Too many errors while parsing at ' + + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), 'running': False, 'good': False } @@ -169,11 +177,12 @@ def run(self): else: badOnes = 0 -def scan(KibbleBit, source): +def scan(KibbleBit, source, filter=None): # Simple URL check jenkins = re.match(r"(https?://.+)", source['sourceURL']) if jenkins: - + if not 'steps' in source: + source['steps'] = {} source['steps']['jenkins'] = { 'time': time.time(), 'status': 'Parsing Jenkins job changes...', @@ -181,7 +190,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + badOnes = 0 pendingJobs = [] KibbleBit.pprint("Parsing Jenkins activity at %s" % source['sourceURL']) @@ -192,32 +201,41 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + # Jenkins may neeed credentials creds = None - if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: + if ('creds' in source and source['creds'] and 'username' in source['creds'] and source['creds']['username'] + and len(source['creds']['username']) > 0): creds = "%s:%s" % (source['creds']['username'], source['creds']['password']) - + + if not creds: + KibbleBit.pprint("JENKINS with no %s authentication." % source['sourceURL']) + # Get the job list - sURL = source['sourceURL'] - KibbleBit.pprint("Getting job list...") - jobsjs = plugins.utils.jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" % sURL , auth = creds) - + sURL: str = source['sourceURL'] + #print("queue URL:", sURL) + KibbleBit.pprint("Getting jenkins job list..." ) + jobsjs = jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" % sURL , auth = creds) + #print ("jobsjs:", jobsjs) + # Get the current queue + # This is always at the root of the build instance KibbleBit.pprint("Getting job queue...") - queuejs = plugins.utils.jsonapi.get("%s/queue/api/json?depth=1" % sURL , auth = creds) - + + queuejs = jsonapi.get("%s/queue/api/json?depth=1" % sURL , auth = creds) + # Save queue snapshot - NOW = int(datetime.datetime.utcnow().timestamp()) - queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source['organisation'], source['sourceURL'], int(time.time())) ).encode('ascii', errors='replace')).hexdigest() - - + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) + queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source['organisation'], source['sourceURL'], int(time.time())) ) + .encode('ascii', errors='replace')).hexdigest() + + # Scan queue items blocked = 0 stuck = 0 totalqueuetime = 0 items = queuejs.get('items', []) - + for item in items: if item['blocked']: blocked += 1 @@ -225,11 +243,11 @@ def scan(KibbleBit, source): stuck += 1 if 'inQueueSince' in item: totalqueuetime += (NOW - int(item['inQueueSince']/1000)) - + avgqueuetime = totalqueuetime / max(1, len(items)) - + # Count how many jobs are building, find any folders... - actual_jobs, building = get_all_jobs(KibbleBit, source, jobsjs.get('jobs', []), creds) + actual_jobs, building = get_all_jobs(KibbleBit, source, jobsjs.get('jobs', []), filter, creds) # Write up a queue doc queuedoc = { @@ -242,18 +260,18 @@ def scan(KibbleBit, source): 'stuck': stuck, 'avgwait': avgqueuetime, 'ci': 'jenkins', - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], 'upsert': True, } KibbleBit.append('ci_queue', queuedoc) - - + + pendingJobs = actual_jobs KibbleBit.pprint("Found %u jobs in Jenkins" % len(pendingJobs)) - + threads = [] block = threading.Lock() KibbleBit.pprint("Scanning jobs using 4 sub-threads") @@ -261,32 +279,48 @@ def scan(KibbleBit, source): t = jenkinsThread(block, KibbleBit, source, creds, pendingJobs) threads.append(t) t.start() - + for t in threads: t.join() - # We're all done, yaay + # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source['sourceURL']) + partial = "(filtered) " if filter else '' source['steps']['issues'] = { 'time': time.time(), - 'status': 'Jenkins successfully scanned at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), + 'status': 'Jenkins successfully '+ partial+'scanned at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), 'running': False, 'good': True } KibbleBit.updateSource(source) - -def get_all_jobs(KibbleBit, source, joblist, creds): + +def get_all_jobs(KibbleBit, source, joblist, job_filter, creds): real_jobs = [] building = 0 for job in joblist: + + #print("jobFilter: ", job_filter) + if (job_filter and job['name'] not in job_filter): + print("Skipping job", job['name']) + continue + # Is this a job folder? jclass = job.get('_class') - if jclass in ['jenkins.branch.OrganizationFolder', 'org.jenkinsci.plugins.workflow.multibranch.WorkflowMultiBranchProject']: + + #KibbleBit.pprint("%s has class %s..." % (job['name'], jclass)) + + if jclass in ['jenkins.branch.OrganizationFolder', + 'org.jenkinsci.plugins.workflow.multibranch.WorkflowMultiBranchProject', + 'org.jenkinsci.plugins.workflow.job.WorkflowJob', + 'com.cloudbees.hudson.plugins.folder.Folder']: KibbleBit.pprint("%s is a jobs folder, expanding..." % job['name']) + csURL = '%s/job/%s' % (source['sourceURL'], urllib.parse.quote(job['name'].replace('/', '%2F'))) + try: - child_jobs = plugins.utils.jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" % csURL, + + child_jobs = jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" % csURL, auth=creds) csource = dict(source) csource['sourceURL'] = csURL @@ -294,7 +328,9 @@ def get_all_jobs(KibbleBit, source, joblist, creds): csource['folder'] = job['name'] else: csource['folder'] += '-' + job['name'] - cjobs, cbuilding = get_all_jobs(KibbleBit, csource, child_jobs.get('jobs', []), creds) + cjobs, cbuilding = get_all_jobs(KibbleBit, csource, child_jobs.get('jobs', []), job_filter, creds) + + KibbleBit.pprint("%s (job/folder) entries found." % (len(cjobs)) ) building += cbuilding for cjob in cjobs: real_jobs.append(cjob) @@ -308,5 +344,6 @@ def get_all_jobs(KibbleBit, source, joblist, creds): building += 1 job['fullURL'] = '%s/job/%s' % (source['sourceURL'], urllib.parse.quote(job['name'].replace('/', '%2F'))) job['folder'] = source.get('folder') + #KibbleBit.pprint("Found job %s ..." % job) real_jobs.append(job) return real_jobs, building diff --git a/src/plugins/scanners/jira.py b/src/plugins/scanners/jira.py index 58a8daa..000c05b 100644 --- a/src/plugins/scanners/jira.py +++ b/src/plugins/scanners/jira.py @@ -14,13 +14,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import importlib import time import datetime import re import json import hashlib -import plugins.utils.jsonapi import threading import requests.exceptions @@ -36,7 +35,7 @@ def accepts(source): if source['type'] == 'jira': return True if source['type'] == "issuetracker": - jira = re.match(r"(https?://.+)/browse/([A-Z0-9]+)", url) + jira = re.match(r"(https?://.+)/browse/([A-Z0-9]+)", source) if jira: return True return False @@ -106,12 +105,12 @@ def pchange(js): def scanTicket(KibbleBit, key, u, source, creds, openTickets): """ Scans a single ticket for activity and people """ - + dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], key) ).encode('ascii', errors='replace')).hexdigest() found = True doc= None parseIt = False - + # the 'domain' var we try to figure out here is used # for faking email addresses and keep them unique, # in case JIRA has email visibility turned off. @@ -119,7 +118,7 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): m = re.search(r"https?://([^/]+)", u) if m: domain = m.group(1) - + found = KibbleBit.exists('issue', dhash) if not found: KibbleBit.pprint("[%s] We've never seen this ticket before, parsing..." % key) @@ -139,13 +138,14 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): KibbleBit.pprint("[%s] Ticket contains erroneous data from a previous scan, reparsing" % key) # This is just noise! #KibbleBit.pprint("[%s] Ticket hasn't changed, ignoring..." % key) - + if parseIt: KibbleBit.pprint("[%s] Parsing data from JIRA at %s..." % (key, domain)) queryURL = "%s/rest/api/2/issue/%s?fields=creator,reporter,status,issuetype,summary,assignee,resolutiondate,created,priority,changelog,comment,resolution,votes&expand=changelog" % (u, key) jiraURL = "%s/browse/%s" % (u, key) + jsonapi = importlib.import_module("plugins.utils.jsonapi") try: - tjson = plugins.utils.jsonapi.get(queryURL, auth = creds) + tjson = jsonapi.get(queryURL, auth = creds) if not tjson: KibbleBit.pprint("%s does not exist (404'ed)" % key) return False @@ -157,12 +157,12 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): KibbleBit.pprint("Closed but no closer??") closerEmail = None status = 'closed' if st else 'open' - + # Make sure we actually have field data to work with if not tjson.get('fields') or not tjson['fields'].get('created'): KibbleBit.pprint("[%s] JIRA response is missing field data, ignoring ticket." % key) return False - + cd = getTime(tjson['fields']['created']) rd = getTime(tjson['fields']['resolutiondate']) if 'resolutiondate' in tjson['fields'] and tjson['fields']['resolutiondate'] else None comments = 0 @@ -190,7 +190,7 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): 'upsert': True } KibbleBit.append('person', jsp) - + if creator: creator = creator.replace(" dot ", ".", 10).replace(" at ", "@", 1) if not '@' in creator: @@ -219,7 +219,7 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): 'created': cd, 'closed': rd, 'issuetype': 'issue', - 'issueCloser': closerEmail, + 'issueCloser': closerEmail, 'createdDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)), 'closedDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd)) if rd else None, 'changeDate': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd)), @@ -234,8 +234,8 @@ def scanTicket(KibbleBit, key, u, source, creds, openTickets): #except Exception as err: #KibbleBit.pprint(err) #return False - - + + class jiraThread(threading.Thread): @@ -247,7 +247,7 @@ def __init__(self, block, KibbleBit, source, creds, pt, ot): self.source = source self.pendingTickets = pt self.openTickets = ot - + def run(self): badOnes = 0 while len(self.pendingTickets) > 0 and badOnes <= 50: @@ -281,13 +281,17 @@ def run(self): def scan(KibbleBit, source): jira = re.match(r"(https?://.+)/browse/([A-Z0-9]+)", source['sourceURL']) if jira: - + + if not 'steps' in source: + source['steps'] = {} + #print("issue source %s" % source ) # JIRA NEEDS credentials to do a proper scan! creds = None - if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: + if 'creds' in source and source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0: creds = "%s:%s" % (source['creds']['username'], source['creds']['password']) if not creds: KibbleBit.pprint("JIRA at %s requires authentication, but none was found! Bailing." % source['sourceURL']) + source['steps']['issues'] = { 'time': time.time(), 'status': 'JIRA endpoint requires auth, but none was provided!', @@ -296,7 +300,7 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - + source['steps']['issues'] = { 'time': time.time(), 'status': 'Parsing JIRA changes...', @@ -304,7 +308,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + badOnes = 0 jsa = [] jsp = [] @@ -317,16 +321,17 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + # Get base URL, list and domain to parse u = jira.group(1) instance = jira.group(2) lastTicket = 0 latestURL = "%s/rest/api/2/search?jql=project=%s+order+by+createdDate+DESC&fields=id,key&maxResults=1" % (u, instance) js = None - + + jsonapi = importlib.import_module("plugins.utils.jsonapi") try: - js = plugins.utils.jsonapi.get(latestURL, auth = creds) + js = jsonapi.get(latestURL, auth = creds) except requests.exceptions.ConnectionError as err: KibbleBit.pprint("Connection error, skipping this ticket for now!") source['steps']['issues'] = { @@ -342,8 +347,8 @@ def scan(KibbleBit, source): m = re.search(r"-(\d+)$", key) if m: lastTicket = int(m.group(1)) - - + + openTickets = [] startAt = 0 badTries = 0 @@ -351,7 +356,7 @@ def scan(KibbleBit, source): openURL = "%s/rest/api/2/search?jql=project=%s+and+status=open+order+by+createdDate+ASC&fields=id,key&maxResults=100&startAt=%u" % (u, instance, startAt) #print(openURL) try: - ojs = plugins.utils.jsonapi.get(openURL, auth = creds) + ojs = jsonapi.get(openURL, auth = creds) if not 'issues' in ojs or len(ojs['issues']) == 0: break for item in ojs['issues']: @@ -362,12 +367,12 @@ def scan(KibbleBit, source): KibbleBit.pprint("JIRA borked, retrying") badTries += 1 KibbleBit.pprint("Found %u open tickets" % len(openTickets)) - + badOnes = 0 for i in reversed(range(1,lastTicket+1)): key = "%s-%u" % (instance, i) pendingTickets.append([key, u, source]) - + threads = [] block = threading.Lock() KibbleBit.pprint("Scanning tickets using 4 sub-threads") @@ -375,10 +380,10 @@ def scan(KibbleBit, source): t = jiraThread(block, KibbleBit, source, creds, pendingTickets, openTickets) threads.append(t) t.start() - + for t in threads: t.join() - + KibbleBit.pprint("Done scanning %s" % source['sourceURL']) source['steps']['issues'] = { @@ -388,4 +393,3 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - diff --git a/src/plugins/scanners/ponymail.py b/src/plugins/scanners/ponymail.py index abe8fc1..fd9bca2 100644 --- a/src/plugins/scanners/ponymail.py +++ b/src/plugins/scanners/ponymail.py @@ -35,12 +35,12 @@ def accepts(source): # If the source equals the plugin name, assume a yes if source['type'] == 'ponymail': return True - + # If it's of type 'mail', check the URL if source['type'] == 'mail': if re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source['sourceURL']): return True - + # Default to not recognizing the source return False @@ -86,9 +86,12 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - + # Pony Mail requires a UI cookie in order to work. Maked sure we have one! cookie = None + + if not 'steps' in source: + source['steps'] = {} if 'creds' in source and source['creds']: cookie = source['creds'].get('cookie', None) if not cookie: @@ -101,7 +104,7 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - + # Notify scanner and DB that this is valid and we've begun parsing KibbleBit.pprint("%s is a valid Pony Mail address, parsing" % source['sourceURL']) source['steps']['mail'] = { @@ -111,13 +114,13 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - - + + # Get base URL, list and domain to parse u = url.group(1) l = url.group(2) d = url.group(3) - + # Get this month dt = time.gmtime(time.time()) firstYear = 1970 @@ -127,15 +130,15 @@ def scan(KibbleBit, source): month += 12 year -= 1 months = 0 - + # Hash for keeping records of who we know knowns = {} - + # While we have older archives, continue to parse while firstYear <= year: statsurl = "%s/api/stats.lua?list=%s&domain=%s&d=%s" % (u, l, d, "%04u-%02u" % (year, month)) dhash = hashlib.sha224((("%s %s") % (source['organisation'], statsurl)).encode('ascii', errors='replace')).hexdigest() - found = False + found = False if KibbleBit.exists('mailstats', dhash): found = True if months <= 1 or not found: # Always parse this month's stats :) @@ -147,7 +150,7 @@ def scan(KibbleBit, source): js = plugins.utils.jsonapi.get(statsurl, cookie = cookie) except Exception as err: KibbleBit.pprint("Server error, skipping this month") - month -= 1 + month -= 1 if month <= 0: month += 12 year -= 1 @@ -196,7 +199,7 @@ def scan(KibbleBit, source): 'id': mlhash } KibbleBit.index('mailtop', mlhash, jst) - + for email in js['emails']: sender = email['from'] name = sender @@ -214,7 +217,7 @@ def scan(KibbleBit, source): if KibbleBit.exists('person',sid): knowns[sender] = True if not sender in knowns or name != sender: - KibbleBit.append('person', + KibbleBit.append('person', { 'upsert': True, 'name': name, @@ -246,8 +249,8 @@ def scan(KibbleBit, source): KibbleBit.append('email', jse) for sender in posters: no_posters += 1 - - + + jso = { 'organisation': source['organisation'], 'sourceURL': source['sourceURL'], @@ -259,16 +262,16 @@ def scan(KibbleBit, source): } #print("Indexing as %s" % dhash) KibbleBit.index('mailstats', dhash, jso) - month -= 1 + month -= 1 if month <= 0: month += 12 year -= 1 - - + + source['steps']['mail'] = { 'time': time.time(), 'status': 'Mail archives successfully scanned at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), 'running': False, 'good': True } - KibbleBit.updateSource(source) \ No newline at end of file + KibbleBit.updateSource(source) diff --git a/src/plugins/scanners/travis.py b/src/plugins/scanners/travis.py index a42dae9..e6cfadb 100644 --- a/src/plugins/scanners/travis.py +++ b/src/plugins/scanners/travis.py @@ -41,19 +41,19 @@ def accepts(source): def scanJob(KibbleBit, source, bid, token, TLD): """ Scans a single job for activity """ - NOW = int(datetime.datetime.utcnow().timestamp()) + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) dhash = hashlib.sha224( ("%s-%s-%s" % (source['organisation'], source['sourceURL'], bid) ).encode('ascii', errors='replace')).hexdigest() found = True doc= None parseIt = False found = KibbleBit.exists('cijob', dhash) - + # Get the job data pages = 0 offset = 0 last_page = False oURL = "https://api.travis-ci.%s/repo/%s/builds" % (TLD, bid) - + # For as long as pagination makes sense... while last_page == False: bURL = "https://api.travis-ci.%s/repo/%s/builds?limit=100&offset=%u" % (TLD, bid, offset) @@ -65,13 +65,13 @@ def scanJob(KibbleBit, source, bid, token, TLD): if repojs['@pagination']['is_last']: KibbleBit.pprint("Assuming this is the last page we need (travis says so)") last_page = True - + KibbleBit.pprint("%s has %u builds done" % (bURL, repojs['@pagination']['count'])) - + # BREAKER: If we go past count somehow, and travis doesn't say so, bork anyway if repojs['@pagination']['count'] < offset: return True - + offset += 100 for build in repojs.get('builds', []): buildID = build['id'] @@ -81,15 +81,15 @@ def scanJob(KibbleBit, source, bid, token, TLD): duration = build['duration'] completed = True if duration else False duration = duration or 0 - - + + buildhash = hashlib.sha224( ("%s-%s-%s-%s" % (source['organisation'], source['sourceURL'], bid, buildID) ).encode('ascii', errors='replace')).hexdigest() builddoc = None try: builddoc = KibbleBit.get('ci_build', buildhash) except: pass - + # If this build already completed, no need to parse it again if builddoc and builddoc.get('completed', False): # If we're on page > 1 and we've seen a completed build, assume @@ -99,7 +99,7 @@ def scanJob(KibbleBit, source, bid, token, TLD): last_page = True break continue - + # Get build status (success, failed, canceled etc) status = 'building' if build['state'] in ['finished', 'passed']: @@ -108,17 +108,17 @@ def scanJob(KibbleBit, source, bid, token, TLD): status = 'failed' if build['state'] in ['aborted', 'canceled']: status = 'aborted' - + FIN = 0 STA = 0 if finishedAt: FIN = datetime.datetime.strptime(finishedAt, "%Y-%m-%dT%H:%M:%SZ").timestamp() if startedAt: STA = int(datetime.datetime.strptime(startedAt, "%Y-%m-%dT%H:%M:%SZ").timestamp()) - + # We don't know how to calc queues yet, set to 0 queuetime = 0 - + doc = { # Build specific data 'id': buildhash, @@ -132,7 +132,7 @@ def scanJob(KibbleBit, source, bid, token, TLD): 'started': STA, 'ci': 'travis', 'queuetime': queuetime, - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], @@ -158,7 +158,7 @@ def __init__(self, block, KibbleBit, source, token, jobs, TLD): self.source = source self.jobs = jobs self.tld = TLD - + def run(self): badOnes = 0 while len(self.jobs) > 0 and badOnes <= 50: @@ -201,7 +201,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + badOnes = 0 pendingJobs = [] KibbleBit.pprint("Parsing Travis activity at %s" % source['sourceURL']) @@ -212,7 +212,7 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - + # Travis needs a token token = None if source['creds'] and 'token' in source['creds'] and source['creds']['token'] and len(source['creds']['token']) > 0: @@ -220,14 +220,14 @@ def scan(KibbleBit, source): else: KibbleBit.pprint("Travis CI requires a token to work!") return False - + # Get the job list, paginated sURL = source['sourceURL'] - + # Used for pagination jobs = 100 offset = 0 - + # Counters; builds queued, running and total jobs queued = 0 # We don't know how to count this yet building = 0 @@ -235,16 +235,16 @@ def scan(KibbleBit, source): blocked = 0 # Dunno how to count yet stuck = 0 # Ditto avgqueuetime = 0 # Ditto, fake it - + maybeQueued = [] while jobs == 100: URL = "https://api.travis-ci.%s/repos?repository.active=true&sort_by=current_build:desc&offset=%u&limit=100&include=repository.last_started_build" % (TLD, offset) offset += 100 r = requests.get(URL, headers = {'Travis-API-Version': '3', 'Authorization': "token %s" % token}) - + if r.status_code != 200: KibbleBit.pprint("Travis did not return a 200 Okay, bad token?!") - + source['steps']['travis'] = { 'time': time.time(), 'status': 'Travis CI scan failed at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time()) + ". Bad token??!"), @@ -253,8 +253,8 @@ def scan(KibbleBit, source): } KibbleBit.updateSource(source) return - - + + # For each build job js = r.json() for repo in js['repositories']: @@ -265,15 +265,15 @@ def scan(KibbleBit, source): if cb['state'] in ['started','created', 'queued', 'pending']: for job in cb.get('jobs', []): maybeQueued.append(job['id']) - - + + # Queue up build jobs for the threaded scanner bid = repo['id'] pendingJobs.append(bid) - + jobs = len(js['repositories']) KibbleBit.pprint("Scanned %u jobs..." % total) - + # Find out how many building and pending jobs for jobID in maybeQueued: URL = "https://api.travis-ci.%s/job/%u" % (TLD, jobID) @@ -288,11 +288,11 @@ def scan(KibbleBit, source): blocked += 1 # Queued in Travis generally means a job can't find an executor, and thus is blocked. KibbleBit.pprint("Job %u is pending" % jobID) KibbleBit.pprint("%u building, %u queued..." % (building, queued)) - + # Save queue snapshot - NOW = int(datetime.datetime.utcnow().timestamp()) + NOW = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source['organisation'], source['sourceURL'], int(time.time())) ).encode('ascii', errors='replace')).hexdigest() - + # Write up a queue doc queuedoc = { 'id': queuehash, @@ -304,17 +304,17 @@ def scan(KibbleBit, source): 'stuck': stuck, 'avgwait': avgqueuetime, 'ci': 'travis', - + # Standard docs values 'sourceID': source['sourceID'], 'organisation': source['organisation'], 'upsert': True, } KibbleBit.append('ci_queue', queuedoc) - - + + KibbleBit.pprint("Found %u jobs in Travis" % len(pendingJobs)) - + threads = [] block = threading.Lock() KibbleBit.pprint("Scanning jobs using 4 sub-threads") @@ -322,11 +322,11 @@ def scan(KibbleBit, source): t = travisThread(block, KibbleBit, source, token, pendingJobs, TLD) threads.append(t) t.start() - + for t in threads: t.join() - # We're all done, yaay + # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source['sourceURL']) source['steps']['travis'] = { @@ -336,4 +336,3 @@ def scan(KibbleBit, source): 'good': True } KibbleBit.updateSource(source) - \ No newline at end of file diff --git a/src/plugins/utils/git.py b/src/plugins/utils/git.py index 8576ce5..3bea67a 100644 --- a/src/plugins/utils/git.py +++ b/src/plugins/utils/git.py @@ -17,8 +17,6 @@ """ This is the Kibble git utility plugin """ -import os -import sys import subprocess import re @@ -30,7 +28,7 @@ def defaultBranch(source, datapath, KibbleBit = None): if KibbleBit and KibbleBit.config.get('git'): wanted_branches = KibbleBit.config['git'].get('wanted_branches', wanted_branches) foundBranch = False - + # For each wanted branch, in order, look for it in our clone, # and return the name if found. for B in wanted_branches: diff --git a/src/plugins/utils/jsonapi.py b/src/plugins/utils/jsonapi.py index 04d173f..9405e2f 100644 --- a/src/plugins/utils/jsonapi.py +++ b/src/plugins/utils/jsonapi.py @@ -40,6 +40,7 @@ def get(url, cookie = None, auth = None, token = None, retries = 5, timeout = 30 headers["Authorization"] = "token %s" % token if cookie: headers["Cookie"] = cookie + # print("fetching url %s" % url) rv = requests.get(url, headers = headers, timeout = (CONNECT_TIMEOUT, timeout)) # Some services may be rate limited. We'll try sleeping it off in 60 second # intervals for a max of five minutes, then give up. @@ -85,4 +86,3 @@ def post(url, data, cookie = None, auth = None): rv = requests.post(url, headers = headers, json = data) js = rv.json() return js - diff --git a/src/plugins/utils/urlmisc.py b/src/plugins/utils/urlmisc.py index 9e75a4b..3340bda 100644 --- a/src/plugins/utils/urlmisc.py +++ b/src/plugins/utils/urlmisc.py @@ -18,6 +18,7 @@ """ This is a Kibble miscellaneous URL functions plugin. """ +import base64 import urllib.request import gzip import tempfile @@ -53,9 +54,8 @@ def unzip(url, creds = None, cookie = None): if err.code != 404 and err.code != 401: tmpfile = tempfile.NamedTemporaryFile(mode='w+b', buffering=1, delete=False) subprocess.check_call(("/usr/bin/wget", "-O", tmpfile.name, url)) - + try: - te compressedFile = open("/tmp/kibbletmp.gz", 'rb') if (compressedFile.read(2) == '\x1f\x8b'): compressedFile.seek(0) @@ -72,4 +72,4 @@ def unzip(url, creds = None, cookie = None): tmpfile.flush() tmpfile.close() return tmpfile.name - return None \ No newline at end of file + return None