From aaa74e9b7afd2aa891b67369bac3d7844064c9f9 Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Sun, 15 Jun 2025 20:48:12 +0200 Subject: [PATCH 1/4] chore: add linter --- .golangci.yml | 20 ++++++++++++++++++++ Dockerfile | 13 ++++++++++++- Makefile | 5 ++++- 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 .golangci.yml diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..e65163b --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,20 @@ +linters: + enable: + - gofmt + - govet + - errcheck + - staticcheck + - unused + - gosimple + - structcheck + - varcheck + - ineffassign + - deadcode + - typecheck + - rowserrcheck + - gosec + - unconvert + +skip-dirs: + - vendor + diff --git a/Dockerfile b/Dockerfile index 0a9d032..4fc6390 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,11 @@ -FROM golang:1.24.4-alpine3.22 AS builddeps +FROM golang:1.24.4-alpine3.22 AS go + +FROM go AS linter +RUN apk add --update --no-cache git +ARG GOLANGCILINT_VERSION=v2.1.6 +RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$GOLANGCILINT_VERSION + +FROM go AS builddeps COPY go.mod go.sum /build/ WORKDIR /build RUN go mod download @@ -8,6 +15,10 @@ COPY internal /build/internal FROM builddeps AS test RUN go test ./... +FROM builddeps AS lint +COPY --from=linter /go/bin/golangci-lint /go/bin/ +RUN golangci-lint run ./... + FROM builddeps AS build RUN go build -o knowledgebot -ldflags "-s -w -extldflags \"-static\"" ./cmd/knowledgebot diff --git a/Makefile b/Makefile index 2f68b3e..eaf3de2 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,10 @@ container: ## Build the container image. ##@ Development test: ## Run unit tests. - docker build --rm -t $(IMAGE) --target=test . + docker build --force-rm -t $(IMAGE) --target=test . + +lint: ## Run linter. + docker build --force-rm -t $(IMAGE) --target=lint . compose-up: container ## Run the compose project. docker compose up From 2d8ca2207c48ec1961745499cffde93ac2c66726 Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Sun, 15 Jun 2025 20:55:58 +0200 Subject: [PATCH 2/4] fix: address issues reported by the linter * Handle errors within the crawler. * Explicitly ignore HTTP stream write errors within the server. * Use the new `llms.Model` interface instead of the deprecated `llms.LLM`. --- cmd/knowledgebot/serve.go | 7 +++++-- internal/importer/crawler/crawler.go | 17 +++++++++++------ internal/qna/qnaworkflow.go | 2 +- internal/server/qnahandler.go | 4 ++-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/cmd/knowledgebot/serve.go b/cmd/knowledgebot/serve.go index 878d59b..3c885b2 100644 --- a/cmd/knowledgebot/serve.go +++ b/cmd/knowledgebot/serve.go @@ -66,7 +66,7 @@ func init() { } func preRunServer(cmd *cobra.Command, args []string) error { - embeddingsModel := storeFactory.LLMFactory.EmbeddingModel + embeddingsModel := storeFactory.EmbeddingModel storeFactory.LLMFactory = llmFactory storeFactory.EmbeddingModel = embeddingsModel @@ -99,7 +99,10 @@ func runServer(cmd *cobra.Command, args []string) error { go func() { <-ctx.Done() - srv.Shutdown(ctx) + err := srv.Shutdown(ctx) + if err != nil { + log.Println("ERROR: failed to shutdown server:", err) + } }() log.Println("listening on", srv.Addr) diff --git a/internal/importer/crawler/crawler.go b/internal/importer/crawler/crawler.go index efe9bd6..19bb82b 100644 --- a/internal/importer/crawler/crawler.go +++ b/internal/importer/crawler/crawler.go @@ -51,7 +51,7 @@ func (s *Crawler) Crawl(ctx context.Context, seedURL string) error { return s.indexDocumentChunks(ctx, cancel, ch, startTime) } -func (s *Crawler) crawl(ctx context.Context, seedURL *url.URL, ch chan<- []schema.Document) error { +func (s *Crawler) crawl(ctx context.Context, seedURL *url.URL, ch chan<- []schema.Document) { defer close(ch) pageCounter := atomic.Uint64{} @@ -95,13 +95,18 @@ func (s *Crawler) crawl(ctx context.Context, seedURL *url.URL, ch chan<- []schem }) c.OnHTML("a[href]", func(e *colly.HTMLElement) { - e.Request.Visit(e.Attr("href")) + err := e.Request.Visit(e.Attr("href")) + if err != nil { + log.Println("ERROR: failed to visit page:", err) + } }) - c.Visit(seedURL.String()) - c.Wait() + err := c.Visit(seedURL.String()) + if err != nil { + log.Println("ERROR: failed to crawl page:", err) + } - return ctx.Err() + c.Wait() } func (s *Crawler) processHTML(ctx context.Context, url *url.URL, html string, ch chan<- []schema.Document) error { @@ -214,7 +219,7 @@ func (s *Crawler) indexDocumentChunks(ctx context.Context, cancel context.Cancel return fmt.Errorf("index scraped chunks: %w", err) } - elapsed := time.Now().Sub(startTime) + elapsed := time.Since(startTime) log.Printf("indexed %d chunks of %d document(s) in %s", chunkCount, docCount, elapsed) diff --git a/internal/qna/qnaworkflow.go b/internal/qna/qnaworkflow.go index 7a405bb..1f5266f 100644 --- a/internal/qna/qnaworkflow.go +++ b/internal/qna/qnaworkflow.go @@ -36,7 +36,7 @@ Here is the related data for the user’s question: ) type QuestionAnswerWorkflow struct { - LLM llms.LLM + LLM llms.Model Temperature float64 Store vectorstores.VectorStore MaxDocs int diff --git a/internal/server/qnahandler.go b/internal/server/qnahandler.go index 87a6eb0..ec1c403 100644 --- a/internal/server/qnahandler.go +++ b/internal/server/qnahandler.go @@ -45,10 +45,10 @@ func newQuestionAnswerHandler(ai *qna.QuestionAnswerWorkflow) http.Handler { } if chunk.Err != nil { - fmt.Fprintln(w, "event: error") + _, _ = fmt.Fprintln(w, "event: error") } - fmt.Fprintf(w, "data: %s\n\n", string(data)) + _, _ = fmt.Fprintf(w, "data: %s\n\n", string(data)) if flusher, ok := w.(http.Flusher); ok { flusher.Flush() From f560f28bb7be08b038eb6fc87c23fa653368a26d Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Sun, 15 Jun 2025 21:34:09 +0200 Subject: [PATCH 3/4] chore: add gha (release) workflow --- .github/workflows/workflow.yaml | 71 +++++++++++++++++++++++++++++++++ Makefile | 18 ++++++++- VERSION | 1 + compose.yaml | 2 +- 4 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/workflow.yaml create mode 100644 VERSION diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml new file mode 100644 index 0000000..cbb8c7d --- /dev/null +++ b/.github/workflows/workflow.yaml @@ -0,0 +1,71 @@ +name: Build + +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: # Run release builds sequentially, cancel outdated PR builds + group: ci-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +permissions: # Grant write access to github.token within non-pull_request builds + contents: write + packages: write + +jobs: + build: + name: Build + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - id: release + name: Prepare release + uses: mgoltzsche/conventional-release@v0 + with: + commit-files: compose.yaml,VERSION + ignore-commits-before: a0a3c294b3be797b831100c0b405a923e15239de + + - name: Set up qemu + if: steps.release.outputs.publish + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Run unit tests + run: | + make test + + - name: Run linter + run: | + make lint + + - name: Build + if: '!steps.release.outputs.publish' + run: | + make container + + - name: Log into GitHub Container Registry + if: steps.release.outputs.publish + run: echo "${{ github.token }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin + + - name: Publish container image + if: steps.release.outputs.publish + run: | + set -u + make push-container VERSION=$RELEASE_VERSION + + - name: Update version within VERSION file and compose.yaml + if: steps.release.outputs.publish + run: | + set -u + make update-version VERSION=$RELEASE_VERSION diff --git a/Makefile b/Makefile index eaf3de2..b81ec3b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ -IMAGE?=knowledgebot +VERSION?=$(shell cat VERSION) +IMAGE=ghcr.io/mgoltzsche/knowledgebot:$(VERSION) all: help @@ -47,6 +48,21 @@ crawl-wikipedia-futurama: ## Crawl Futurama-related Wikipedia pages. render-diagrams: ## Render PNGs from PlantUML diagrams. docker run --rm -v "`pwd`/docs/diagrams:/data" plantuml/plantuml:1.2025 *.puml +##@ Release + +push-container: PLATFORM?=linux/amd64,linux/arm64/v8 +push-container: BUILDX_OUTPUT?=type=registry +push-container: BUILDX_BUILDER?=knowledgebot-builder +push-container: ## Push the container image. + @[ ! '$(VERSION)' = dev ] || (echo 'No VERSION specified!' >&2; false) + docker buildx inspect $(BUILDX_BUILDER) >/dev/null 2<&1 || docker buildx create --name=$(BUILDX_BUILDER) >/dev/null + docker buildx build --rm -t $(IMAGE) --builder=$(BUILDX_BUILDER) --output=$(BUILDX_OUTPUT) --platform=$(PLATFORM) . + +update-version: ## Update version in VERSION file and compose.yaml. + sed -Ei.bak 's!image: .*knowledgebot:.+!image: ghcr.io/mgoltzsche/knowledgebot:$(VERSION)!g' compose.yaml + rm -f compose.yaml.bak + echo "$(VERSION)" > VERSION + ##@ General help: ## Display this help. diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..38f8e88 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +dev diff --git a/compose.yaml b/compose.yaml index f476dd4..1b15cc5 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1,6 +1,6 @@ services: knowledgebot: - image: knowledgebot + image: ghcr.io/mgoltzsche/knowledgebot:dev # environment: # KLB_OPENAI_URL: http://ollama:11434 # KLB_TOPIC: Simpsons TV Show From 2180148389ff689c57d2607b9896624b1d0ff05d Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Sun, 15 Jun 2025 22:02:32 +0200 Subject: [PATCH 4/4] docs: add contributing guidelines, code of conduct --- CODE_OF_CONDUCT.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 48 +++++++++++++++++++++++++++++ README.md | 1 + 3 files changed, 125 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..7ceb17e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at max.goltzsche@gmail.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..3d29397 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,48 @@ +# How to contribute + +Contributions, questions and feedback are welcome. +There are just a few small guidelines you need to follow. + +## Community guidelines + +This project follows the [Contributor Covenant Code of Conduct](./CODE_OF_CONDUCT.md). + +## Creating issues + +Feedback, questions, bug reports and feature requests are welcome in form of issues. +Though before creating an issue please have a look [here](https://github.com/mgoltzsche/knowledgebot/issues) if the issue doesn't exist already in order to avoid duplicates. +When you cannot find a corresponding issue please create a [new one](https://github.com/mgoltzsche/knowledgebot/issues/new/choose). + +### Bug reports + +When reporting a bug please answer the following questions: +* Which environment and version do you use? Does it happen with the latest version as well? +* What did you do and what happened? (in a reproducible way ideally) +* What do you expect instead? + +### Feature requests + +When creating a feature request please answer the following questions: +* What is the problem? +* What do you want to change? +* How do you want it to behave? +* Why do you need it? +* Which alternatives did you consider? + +## Creating pull requests (PRs) + +While there is no need to create an issue before creating a PR please consider discussing large changes within an issue first. + +When creating a PR please follow these rules: +* Create one PR per feature. +* Provide one commit per PR ideally. +* Provide a description with each PR and commit that links an issue (if any), describes the problem, how it was solved and how the solution changes the user-facing behaviour. +* All submissions require a review. +* All commits should be signed using PGP and a [DCO](https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin). This can be done using `git commit -s -S`. +* When submitting a bug fix or feature PR, please also provide tests. +* When submitting a new feature, please document it. +* When writing markdown, asciidoc or code comments, please use one sentence per line, see [here](https://mattwidmann.net/notes/using-markdown-effectively/). + +## Building and testing the source + +See [here](./README.md). diff --git a/README.md b/README.md index a84d308..fbf93d1 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,7 @@ This project is licensed under the [Apache 2.0 License](LICENSE). ## Contributing Contributions are welcome! Please feel free to open issues or pull requests. +For more details, see the [contributing guidelines](./CONTRIBUTING.md). ## Contact & Support