From 9530615b3da13db99d57cb71622a03f866f7ce80 Mon Sep 17 00:00:00 2001 From: mountain Date: Thu, 25 Jun 2026 23:31:08 +0800 Subject: [PATCH 01/11] docs(examples): add per-case usage guides with real generated artifacts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add examples/ as a use-case-indexed set of guides. Each case is its own directory: a README walkthrough plus the actual artifact OpenKB produced. - configuration — init, config.yaml, keys, LiteLLM tuning - commands — the everyday loop (+ a compiled sample-wiki/) - pageindex-cloud — long docs: local vs cloud + cloud import - chat — the interactive REPL: sessions + slash commands - skills — a generated SKILL.md + references/ + marketplace.json - slides — a generated single-file HTML deck - visualize — a generated interactive knowledge graph Every artifact was produced by running openkb over the sample attention-is-all-you-need.pdf with gpt-5.4-mini. The heavy/third-party test PDFs stay gitignored under examples/docs/. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- examples/README.md | 44 + examples/chat/README.md | 92 ++ examples/commands/README.md | 174 ++++ .../concepts/attention-mechanisms.md | 81 ++ .../concepts/positional-encoding.md | 48 + .../concepts/transformer-models.md | 80 ++ .../sample-wiki/entities/google-brain.md | 45 + .../sample-wiki/entities/google-research.md | 29 + .../commands/sample-wiki/entities/google.md | 30 + .../sample-wiki/entities/nips-2017.md | 31 + .../sample-wiki/entities/tensor2tensor.md | 27 + .../entities/university-of-toronto.md | 25 + .../entities/wmt-2014-english-french.md | 31 + .../entities/wmt-2014-english-german.md | 32 + .../commands/sample-wiki/entities/wmt-2014.md | 31 + ...mer-replace-recurrence-with-self-attent.md | 9 + .../summaries/attention-is-all-you-need.md | 136 +++ examples/configuration/README.md | 163 ++++ examples/pageindex-cloud/README.md | 129 +++ examples/skills/README.md | 134 +++ examples/skills/marketplace.json | 26 + .../skills/transformer-attention/SKILL.md | 52 + .../references/original-transformer-paper.md | 11 + examples/slides/README.md | 86 ++ examples/slides/attention-intro.html | 151 +++ examples/visualize/README.md | 77 ++ examples/visualize/graph.html | 907 ++++++++++++++++++ 27 files changed, 2681 insertions(+) create mode 100644 examples/README.md create mode 100644 examples/chat/README.md create mode 100644 examples/commands/README.md create mode 100644 examples/commands/sample-wiki/concepts/attention-mechanisms.md create mode 100644 examples/commands/sample-wiki/concepts/positional-encoding.md create mode 100644 examples/commands/sample-wiki/concepts/transformer-models.md create mode 100644 examples/commands/sample-wiki/entities/google-brain.md create mode 100644 examples/commands/sample-wiki/entities/google-research.md create mode 100644 examples/commands/sample-wiki/entities/google.md create mode 100644 examples/commands/sample-wiki/entities/nips-2017.md create mode 100644 examples/commands/sample-wiki/entities/tensor2tensor.md create mode 100644 examples/commands/sample-wiki/entities/university-of-toronto.md create mode 100644 examples/commands/sample-wiki/entities/wmt-2014-english-french.md create mode 100644 examples/commands/sample-wiki/entities/wmt-2014-english-german.md create mode 100644 examples/commands/sample-wiki/entities/wmt-2014.md create mode 100644 examples/commands/sample-wiki/explorations/how-does-the-transformer-replace-recurrence-with-self-attent.md create mode 100644 examples/commands/sample-wiki/summaries/attention-is-all-you-need.md create mode 100644 examples/configuration/README.md create mode 100644 examples/pageindex-cloud/README.md create mode 100644 examples/skills/README.md create mode 100644 examples/skills/marketplace.json create mode 100644 examples/skills/transformer-attention/SKILL.md create mode 100644 examples/skills/transformer-attention/references/original-transformer-paper.md create mode 100644 examples/slides/README.md create mode 100644 examples/slides/attention-intro.html create mode 100644 examples/visualize/README.md create mode 100644 examples/visualize/graph.html diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..012b0ec4 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,44 @@ +# OpenKB examples + +Hands-on, organized by use case. Each folder is one case — a short walkthrough +plus the **real artifact** OpenKB produced for it (generated, not hand-written). + +> **Test inputs** live in [`docs/`](docs/) — the sample PDFs the cases ingest. +> They're kept out of git on purpose (large / third-party files); drop your own +> documents there to follow along. + +## Cases + +| Folder | What it shows | Real artifact inside | +| --- | --- | --- | +| [`configuration/`](configuration/) | `init`, `config.yaml`, API keys, LiteLLM tuning (Ollama, Copilot) | — | +| [`commands/`](commands/) | the everyday loop: `add` · `query` · `remove` · `recompile` · `lint` · `list` · `status` | a compiled [`sample-wiki/`](commands/sample-wiki/) | +| [`pageindex-cloud/`](pageindex-cloud/) | long documents: local vs. cloud indexing, and importing cloud-indexed docs | — | +| [`chat/`](chat/) | the interactive REPL: persistent sessions + slash commands | — | +| [`skills/`](skills/) | distill a redistributable agent skill from your wiki | [`transformer-attention/SKILL.md`](skills/transformer-attention/SKILL.md) | +| [`slides/`](slides/) | generate a single-file HTML slide deck | [`attention-intro.html`](slides/attention-intro.html) | +| [`visualize/`](visualize/) | render the wiki as an interactive knowledge graph | [`graph.html`](visualize/graph.html) | + +## It's all from one paper + +The compiled `sample-wiki/`, the skill, the deck, and the graph were **all** +generated by running OpenKB over a single document — +[`docs/attention-is-all-you-need.pdf`](docs/attention-is-all-you-need.pdf) — with +`gpt-5.4-mini`: + +```bash +mkdir my-kb && cd my-kb +openkb init --model gpt-5.4-mini --language en + +openkb add /path/to/examples/docs/attention-is-all-you-need.pdf # → commands/sample-wiki/ +openkb query "How does attention replace recurrence?" --save # → commands/sample-wiki/explorations/ +openkb skill new transformer-attention "Reason about the Transformer…" # → skills/ +openkb deck new attention-intro "A short intro deck on the Transformer…" # → slides/ +openkb visualize # → visualize/ +``` + +Your generated text will differ run to run (LLM output isn't deterministic) — the +*structure* is what's stable. + +New to OpenKB? Read [`configuration/`](configuration/) first, then +[`commands/`](commands/). diff --git a/examples/chat/README.md b/examples/chat/README.md new file mode 100644 index 00000000..032b36f8 --- /dev/null +++ b/examples/chat/README.md @@ -0,0 +1,92 @@ +# Chat TUI + +`openkb chat` is an interactive REPL over your wiki. Unlike `query` (one-shot), a +chat session keeps context across turns, can edit the KB through slash commands, +and is saved so you can resume it later. + +```bash +openkb chat +``` + +```text +OpenKB Chat +~/research-kb · anthropic/claude-sonnet-4-6 · session 20260625-143022-a1x +Type /help for commands, Ctrl-D to exit, Ctrl-C to abort the current response. + +>>> How do the two papers differ on their use of attention? +Both rely on scaled dot-product attention, but… + · read_wiki_file(path="concepts/self-attention.md") + · read_wiki_file(path="summaries/deepseek-r1.md") + +>>> /save attention-comparison +Saved to wiki/explorations/attention-comparison-20260625.md +``` + +Answers are grounded in your wiki: the agent reads `concepts/`, `summaries/`, +`entities/`, and source files, and shows the tool calls it makes. Responses render +as rich Markdown (headings, tables, code) in a terminal. + +--- + +## Persistent sessions + +Every conversation is stored as JSON in `/.openkb/chats/`. Manage them with: + +```bash +openkb chat --list # table of sessions: id · turns · updated · title +openkb chat --resume # resume the most recent session +openkb chat --resume 20260625 # resume by id or unique prefix +openkb chat --delete 20260625 # delete a session +``` + +Resuming replays the last few turns so you have context: + +```text +$ openkb chat --resume +Resumed session · 4 turn(s) +[3] >>> How do the two papers differ on their use of attention? +[3] Both rely on scaled dot-product attention, but… +[4] >>> /save attention-comparison +``` + +--- + +## Slash commands + +Inside the REPL, lines starting with `/` are commands rather than questions. Run +`/help` to see the current set; the built-ins are: + +| Command | What it does | +| --- | --- | +| `/help` | List available commands | +| `/exit`, `/quit` | Leave the REPL (Ctrl-D also works) | +| `/clear` | Start a fresh session (the previous one is saved) | +| `/save [name]` | Export the transcript to `wiki/explorations/-.md` | +| `/status` | Show KB status without leaving chat | +| `/list` | List documents in the KB | +| `/lint` | Run the integrity + knowledge lint | +| `/add ` | Ingest a file or directory (Tab-completes paths) | +| `/skill new ""` | Compile a skill from the wiki — see [`skills/`](../skills/) | +| `/deck new [--critique] [--skill ] ""` | Generate an HTML deck — see [`slides/`](../slides/) | +| `/critique ` | Run the HTML critic over an existing deck/page | + +Slash commands run inline — errors are reported and the conversation continues; +Ctrl-C aborts the running command without ending the session. + +### Why slash commands matter + +They turn chat into a workbench: ask a question, realize you're missing a source, +`/add` it, and keep going — all in one session. Because the chat agent can write +to `wiki/explorations/**` and `output/**`, asking it to "write that up as a note" +or "turn this into a skill" produces real files you keep. + +--- + +## Plain output for piping or logs + +```bash +openkb chat --no-color # disable colored output entirely +openkb chat --raw # show raw Markdown source, keep prompt/tool colors +``` + +`--no-color` also respects the `NO_COLOR` environment variable. diff --git a/examples/commands/README.md b/examples/commands/README.md new file mode 100644 index 00000000..9c134762 --- /dev/null +++ b/examples/commands/README.md @@ -0,0 +1,174 @@ +# Command reference + +The everyday loop. Every command resolves the active KB the same way (see +[`configuration/`](../configuration/#4-where-is-the-kb)), so you can run them from +anywhere once a KB is initialized. + +> **Real artifact in this folder:** [`sample-wiki/`](sample-wiki/) is a complete +> wiki compiled by `openkb add` from one paper — +> [1 summary](sample-wiki/summaries/attention-is-all-you-need.md), +> [3 concepts](sample-wiki/concepts/), [9 entities](sample-wiki/entities/), and a +> saved [`query --save`](sample-wiki/explorations/) answer. Notice how every page +> is stitched together with `[[wikilinks]]` — that cross-linking *is* the +> knowledge graph you see in [`visualize/`](../visualize/). + +| Command | Purpose | Key flags | +| --- | --- | --- | +| `add ` | Ingest documents | `--from-pageindex-cloud` | +| `query ` | One-off question | `--save`, `--raw` | +| `remove ` | Delete a document | `--keep-raw`, `--keep-empty`, `--dry-run`, `--yes` | +| `recompile [doc]` | Re-run the compile pipeline | `--all`, `--dry-run`, `--yes`, `--refresh-schema` | +| `lint` | Check wiki integrity | `--fix` | +| `list` | Show indexed docs & pages | – | +| `status` | KB stats + root path | – | +| `watch` | Auto-ingest files dropped in `raw/` | – | +| `feedback [msg]` | File a prefilled GitHub issue | `--type` | + +--- + +## `add` — ingest documents + +```bash +openkb add ../docs/attention-is-all-you-need.pdf # a single file +openkb add ~/papers/ # a directory (recursive) +openkb add https://arxiv.org/pdf/2509.11420 # a URL +openkb add --from-pageindex-cloud # an already-indexed cloud doc +``` + +- **Supported formats:** `.pdf .md .markdown .docx .pptx .xlsx .xls .html .htm + .txt .csv` (plus URLs). +- **URLs** are sniffed by content type: PDFs are downloaded and indexed; HTML is + run through a main-content extractor (trafilatura) and ingested as Markdown. +- **Long vs. short PDFs** are split by `pageindex_threshold` — see + [`pageindex-cloud/`](../pageindex-cloud/). +- **Idempotent:** a document is registered by content hash only after it compiles + successfully, so re-adding the same file is skipped and a failed add can be + retried. + +Compiling [`../docs/attention-is-all-you-need.pdf`](../docs/attention-is-all-you-need.pdf) +is what produced [`sample-wiki/`](sample-wiki/) in this folder. + +--- + +## `query` — ask a one-off question + +```bash +openkb query "What are the main contributions of this paper?" +openkb query "Compare the two training objectives" --save +openkb query "How does this KB work?" --raw | less +``` + +- `--save` writes the answer to `wiki/explorations/.md` with a `query:` + frontmatter field, so good answers become part of the wiki. +- `--raw` prints raw Markdown (no rich rendering) — useful for piping. +- Output streams in a terminal and switches to a plain final answer when piped or + redirected, so it's safe in scripts. + +> **See it:** a real `--save` result is in +> [`sample-wiki/explorations/`](sample-wiki/explorations/). + +--- + +## `remove` — delete a document + +Identify a doc by filename, its slug, or a unique substring: + +```bash +openkb remove attention-is-all-you-need.pdf # exact filename +openkb remove attention # unique substring +openkb remove attention --dry-run # preview, change nothing +openkb remove attention --keep-empty # keep concept pages it solely sourced +openkb remove attention --keep-raw --yes # leave raw/ file, no prompt +``` + +`remove` deletes the summary, sources, and extracted images; drops the doc from +every concept/entity page's `sources:` (deleting pages whose **only** source was +this doc, unless `--keep-empty`); prunes `index.md` and the hash registry; and +runs a scoped `lint --fix` to clean any dangling `[[wikilinks]]`. For long local +PDFs it also clears the PageIndex state. Use `--dry-run` first when unsure. + +--- + +## `recompile` — regenerate wiki pages + +Re-runs the compile step against already-ingested content (no re-conversion, no +re-indexing): + +```bash +openkb recompile attention # one document +openkb recompile --all --dry-run # preview the full set +openkb recompile --all --yes # rebuild everything +openkb recompile --all --refresh-schema # also refresh wiki/AGENTS.md +``` + +> ⚠️ Recompiling **overwrites** generated summaries and concept pages — any manual +> edits to those are lost. `--refresh-schema` backs up the old `AGENTS.md` to +> `AGENTS.md.bak` before replacing it. + +--- + +## `lint` — check (and fix) wiki integrity + +```bash +openkb lint # report only +openkb lint --fix # repair broken wikilinks first, then report +``` + +Checks broken `[[wikilinks]]`, orphaned pages, raw files with no wiki entry, +`index.md` drift, and invalid frontmatter, plus an LLM-driven knowledge pass. +Reports are written to `wiki/reports/lint_.md`. `--fix` fuzzy-matches +broken links or strips them to plain text when there's no match. + +--- + +## `list` & `status` + +```bash +openkb status +``` + +```text +Knowledge base: /Users/you/my-kb + +Knowledge Base Status: + Directory Files + -------------------- ---------- + sources 1 + summaries 1 + concepts 3 + entities 9 + reports 0 + raw 1 + + Total indexed: 1 document(s) + Last compile: 2026-06-25 14:30:22 +``` + +`openkb list` prints the document table (name · type · pages — long PDFs and cloud +imports both show as `pageindex`) followed by the compiled summaries, concepts, +entities, and reports. (The counts above match [`sample-wiki/`](sample-wiki/).) + +--- + +## `watch` — drop-in ingestion + +```bash +openkb watch +# in another terminal: cp new-paper.pdf raw/ → auto-compiles +``` + +Watches `raw/` and runs `add` on each new supported file until you press Ctrl-C. + +--- + +## `feedback` — report an issue + +```bash +openkb feedback "add support for EPUB" --type feature +openkb feedback # interactive +``` + +Opens a **prefilled** GitHub issue (title, body, non-sensitive diagnostics like +OpenKB/Python version and platform) in your browser — you file it with your own +account. `--type` is one of `bug`, `feature`, `question`, `other`. Safe in +non-interactive shells (it won't hang on the type prompt). diff --git a/examples/commands/sample-wiki/concepts/attention-mechanisms.md b/examples/commands/sample-wiki/concepts/attention-mechanisms.md new file mode 100644 index 00000000..f86f256c --- /dev/null +++ b/examples/commands/sample-wiki/concepts/attention-mechanisms.md @@ -0,0 +1,81 @@ +--- +type: "Concept" +sources: ["summaries/attention-is-all-you-need.md"] +description: "Methods that weight sequence elements to focus computation on relevant inputs." +--- + +# Attention Mechanisms + +## Overview + +Attention mechanisms are techniques for computing a weighted combination of values based on the relevance of a query to a set of keys. In sequence models, they let a model focus on the most relevant parts of an input or output sequence when producing a representation or a prediction. + +In [[summaries/attention-is-all-you-need]], attention is the central building block of the [[concepts/transformer-models|Transformer]], replacing recurrence and convolution entirely. + +## Core idea + +An attention mechanism takes: + +- a **query** +- a set of **keys** +- a set of **values** + +It produces an output by scoring how well the query matches each key, turning those scores into weights, and returning a weighted sum of the values. + +This makes attention useful for: + +- aligning input and output tokens +- modeling long-range dependencies +- selecting relevant context dynamically + +## Attention in the Transformer + +The paper describes two important forms of attention: + +### Scaled dot-product attention + +The Transformer computes attention using dot products between queries and keys, scaled by the square root of the key dimension before applying softmax. This scaling helps prevent large dot products from producing overly sharp softmax distributions. + +### Multi-head attention + +Instead of using a single attention operation, the model projects queries, keys, and values into multiple subspaces and applies attention in parallel. The outputs are concatenated and projected again. + +This lets the model attend to different kinds of relationships at the same time, such as syntax, position, or semantic association. + +## Self-attention + +A major special case is [[concepts/attention-mechanisms|self-attention]], where queries, keys, and values all come from the same sequence. In the Transformer: + +- the encoder uses self-attention to let each position attend to all others in the input +- the decoder uses masked self-attention so each position can only attend to earlier outputs +- encoder-decoder attention lets the decoder attend to the encoded input sequence + +Self-attention is the key mechanism that allows the Transformer to avoid recurrence while still modeling dependencies across the full sequence. + +## Why it matters + +The paper argues that attention mechanisms are advantageous because they: + +- reduce the number of sequential operations +- improve parallelization during training +- shorten paths between distant tokens +- work well for translation and parsing tasks + +Compared with recurrent layers, attention can connect any pair of positions in constant depth. Compared with convolutions, it avoids needing many stacked layers to relate distant positions. + +## Positional information + +Because attention alone does not encode token order, the Transformer adds [[concepts/positional-encoding|positional encoding]] to token embeddings. This gives the model information about sequence position while preserving the parallel structure of attention-based computation. + +## Key takeaways from the paper + +- Attention is a flexible mechanism for dynamic relevance weighting. +- Self-attention can replace recurrence in sequence models. +- Multi-head attention improves expressiveness by attending from multiple subspaces. +- Attention-only architectures can achieve strong results on machine translation and parsing. + +## Related pages + +- [[concepts/transformer-models]] +- [[concepts/positional-encoding]] +- [[summaries/attention-is-all-you-need]] \ No newline at end of file diff --git a/examples/commands/sample-wiki/concepts/positional-encoding.md b/examples/commands/sample-wiki/concepts/positional-encoding.md new file mode 100644 index 00000000..dbd7c0ea --- /dev/null +++ b/examples/commands/sample-wiki/concepts/positional-encoding.md @@ -0,0 +1,48 @@ +--- +type: "Concept" +sources: ["summaries/attention-is-all-you-need.md"] +description: "Position information added to attention-only models so they can use order." +--- + +# Positional Encoding + +## Overview + +Positional encoding is the mechanism used in the Transformer to inject order information into token embeddings when the model has no recurrence or convolution. In [[summaries/attention-is-all-you-need]], it is the solution that lets the attention-only architecture know where each token appears in a sequence. + +## Why it is needed + +The Transformer model removes the sequential structure of recurrent networks. That makes it highly parallelizable, but it also means the model has no built-in sense of token order. Without an explicit position signal, the same set of tokens would look identical regardless of arrangement. + +Positional encoding fixes this by adding a position-dependent vector to each input embedding at the bottom of the encoder and decoder stacks. + +## How it works + +The paper uses fixed sinusoidal encodings: + +- even dimensions use sine functions +- odd dimensions use cosine functions +- each dimension has a different frequency + +This creates a position vector with the same dimensionality as the embeddings, so the two can be summed directly. + +The encoding is defined so that relative offsets can be represented as linear combinations of the encodings, which helps the model learn to attend by relative position. + +## Key properties + +### Same dimensionality as embeddings +The positional vectors have dimension equal to the model dimension, so they can be added to token embeddings without changing shape. + +### Fixed, not learned +The paper experimented with learned positional embeddings and found similar results, but chose sinusoidal encodings because they may generalize better to sequence lengths not seen during training. + +### Supports extrapolation +Because the encoding is deterministic and not tied to a fixed lookup table, it can in principle be applied to longer sequences than those encountered during training. + +## Role in the Transformer + +Positional encoding is part of the input representation for both the encoder and decoder. It works alongside [[concepts/attention-mechanisms]] and [[concepts/transformer-models]] to make attention-based sequence modeling possible without recurrence. + +## Summary + +Positional encoding gives the Transformer access to token order while preserving the model’s fully parallel structure. In the original paper, sinusoidal positional encodings were an effective and elegant choice that performed on par with learned embeddings and offered potential extrapolation benefits. diff --git a/examples/commands/sample-wiki/concepts/transformer-models.md b/examples/commands/sample-wiki/concepts/transformer-models.md new file mode 100644 index 00000000..1a9ed75b --- /dev/null +++ b/examples/commands/sample-wiki/concepts/transformer-models.md @@ -0,0 +1,80 @@ +--- +type: "Concept" +sources: ["summaries/attention-is-all-you-need.md"] +description: "Attention-only sequence models built around stacked self-attention." +--- + +# Transformer Models + +Transformer models are neural sequence transduction models that replace recurrence and convolution with attention-only computation. They use stacked self-attention layers to model relationships between tokens, making them highly parallelizable and effective for tasks such as machine translation and parsing. + +## Core idea + +The Transformer architecture was introduced in [[summaries/attention-is-all-you-need]] as an alternative to recurrent sequence-to-sequence models. Instead of processing tokens sequentially, it computes interactions between positions through [[concepts/attention-mechanisms|attention mechanisms]], allowing all positions to be considered in parallel during training. + +## Main architectural features + +A standard Transformer uses an encoder-decoder structure: + +- **Encoder**: a stack of identical layers, each containing + - multi-head self-attention + - a position-wise feed-forward network + - residual connections and layer normalization +- **Decoder**: a similar stack with + - masked self-attention to preserve autoregressive generation + - encoder-decoder attention + - a position-wise feed-forward network + - residual connections and layer normalization + +The model also uses [[concepts/positional-encoding|positional encoding]] to inject token order information, since the architecture itself has no recurrence or convolution. + +## Why the architecture matters + +Transformer models were important because they addressed several limitations of recurrent neural networks: + +- **Parallelism**: training can process many positions at once +- **Shorter dependency paths**: long-range relationships are easier to learn +- **Efficiency**: they can train faster while achieving strong quality + +The paper argues that self-attention reduces the maximum path length between positions to a constant per layer, which helps with long-distance dependencies. + +## Attention in Transformers + +Transformers use attention in three main places: + +- **Encoder self-attention**: each input position attends to all other input positions +- **Decoder self-attention**: each output position attends only to earlier output positions +- **Encoder-decoder attention**: decoder states attend to encoder outputs + +The paper uses **scaled dot-product attention** and **multi-head attention**. Multiple heads let the model attend to different subspaces and different kinds of relationships at the same time. + +## Practical configuration from the paper + +The original Transformer used: + +- 6 encoder layers and 6 decoder layers +- model dimension 512 in the base model +- 8 attention heads +- feed-forward inner dimension 2048 +- dropout and label smoothing during training + +The paper also reported a larger model that achieved even better translation results. + +## Results and impact + +In the source paper, Transformer models achieved state-of-the-art performance on: + +- [[entities/wmt-2014-english-german|WMT 2014 English-German]] translation +- [[entities/wmt-2014-english-french|WMT 2014 English-French]] translation + +They also generalized well to English constituency parsing. The architecture became foundational for later advances in natural language processing and sequence modeling. + +## Related concepts + +- [[concepts/attention-mechanisms]] +- [[concepts/positional-encoding]] +- [[summaries/attention-is-all-you-need]] + +## Notable significance + +Transformer models are widely seen as a turning point in deep learning for language. Their attention-only design helped establish a new default architecture for machine translation, language modeling, and many other sequence tasks. \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/google-brain.md b/examples/commands/sample-wiki/entities/google-brain.md new file mode 100644 index 00000000..93e9e296 --- /dev/null +++ b/examples/commands/sample-wiki/entities/google-brain.md @@ -0,0 +1,45 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Organization" +description: "Google Brain is a Google research team involved in the Transformer paper." +--- + +## Overview + +Google Brain is a research organization within Google that appears in the author affiliations of [[summaries/attention-is-all-you-need]]. In that paper, multiple coauthors list Google Brain as their home institution, and the team is credited with major contributions to the development, implementation, and evaluation of the [[concepts/transformer-models|Transformer]] architecture. + +## Role in *Attention Is All You Need* + +The paper credits Google Brain personnel for several core aspects of the work, including: + +- proposing and evaluating self-attention as a replacement for recurrent layers +- designing and implementing the first Transformer models +- proposing scaled dot-product attention and multi-head attention +- building and improving the codebase used for experiments +- supporting efficient inference and visualization work + +## Named contributors affiliated with Google Brain in the paper + +Several authors are listed with Google Brain affiliations, including: + +- Ashish Vaswani +- Noam Shazeer +- Łukasz Kaiser + +The paper also notes collaborative work across [[entities/google-research|Google Research]] and other institutions. + +## Related ideas + +Google Brain's work in this paper is closely tied to: + +- [[concepts/attention-mechanisms|attention mechanisms]] +- [[concepts/transformer-models|Transformer models]] +- [[concepts/positional-encoding|positional encoding]] + +## Related documents + +- [[summaries/attention-is-all-you-need]] + +## Notes + +In this document, Google Brain is presented as the institutional setting for a major breakthrough in neural machine translation and sequence modeling, and as a central contributor to the Transformer architecture that became foundational in later NLP systems. \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/google-research.md b/examples/commands/sample-wiki/entities/google-research.md new file mode 100644 index 00000000..d839a07e --- /dev/null +++ b/examples/commands/sample-wiki/entities/google-research.md @@ -0,0 +1,29 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Organization" +description: "Google Research is the research division of Google involved in the Transformer paper." +--- + +## Overview + +[[entities/google-research|Google Research]] is a research organization at Google that co-authored [[summaries/attention-is-all-you-need]] and contributed to the development of the [[concepts/transformer-models|Transformer]] architecture. + +## In this document + +In this paper, Google Research authors include Niki Parmar and Jakob Uszkoreit. The document presents the Transformer, an attention-only sequence transduction model that removes recurrence and convolutions in favor of [[concepts/attention-mechanisms|attention mechanisms]]. + +## Key facts from the paper + +- Google Research authors helped design and evaluate the Transformer. +- The paper reports state-of-the-art translation results on [[entities/wmt-2014|WMT 2014]] tasks, including [[entities/wmt-2014-english-german|English-German]] and [[entities/wmt-2014-english-french|English-French]]. +- The work was presented at [[entities/nips-2017|NIPS 2017]]. +- The paper also references the Tensor2Tensor codebase, associated with [[entities/tensor2tensor|tensor2tensor]]. + +## Related pages + +- [[summaries/attention-is-all-you-need]] +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/tensor2tensor]] diff --git a/examples/commands/sample-wiki/entities/google.md b/examples/commands/sample-wiki/entities/google.md new file mode 100644 index 00000000..b6be31d4 --- /dev/null +++ b/examples/commands/sample-wiki/entities/google.md @@ -0,0 +1,30 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Organization" +description: "Google is the company behind the Transformer paper's research team." +--- + +# Google + +Google is the organization credited in this paper as the employer of several authors and the institution behind the research that produced the Transformer. + +## Relation to the paper + +In [[summaries/attention-is-all-you-need]], Google appears as the institutional home of the authors from [[entities/google-brain]] and [[entities/google-research]]. The paper states that the work was carried out across those research groups and later released through the NIPS 2017 publication venue, [[entities/nips-2017]]. + +## Key facts from this document + +- Google granted permission to reproduce the tables and figures in the paper. +- The paper's authors include researchers affiliated with Google Brain and Google Research. +- The work introduced the [[concepts/transformer-models|Transformer]] architecture. +- The research was applied to machine translation tasks involving [[entities/wmt-2014-english-german]] and [[entities/wmt-2014-english-french]]. +- The paper mentions code released at the Tensor2Tensor project, associated with [[entities/tensor2tensor]]. + +## Related pages + +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/tensor2tensor]] +- [[concepts/transformer-models]] +- [[concepts/positional-encoding]] +- [[summaries/attention-is-all-you-need]] \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/nips-2017.md b/examples/commands/sample-wiki/entities/nips-2017.md new file mode 100644 index 00000000..e27f6630 --- /dev/null +++ b/examples/commands/sample-wiki/entities/nips-2017.md @@ -0,0 +1,31 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Event" +description: "The 31st Conference on Neural Information Processing Systems in 2017." +--- + +# NeurIPS 2017 + +## Overview + +NeurIPS 2017 was the 31st Conference on Neural Information Processing Systems, held in Long Beach, California, USA. The paper [[summaries/attention-is-all-you-need]] was presented at this conference. + +## Key facts from the document + +- The paper credits its venue as the **31st Conference on Neural Information Processing Systems (NIPS 2017)**. +- The conference location is given as **Long Beach, CA, USA**. +- The paper is dated **2 Aug 2023** in its arXiv version, but the conference venue referenced is the 2017 NeurIPS meeting. +- The document introduces the [[concepts/transformer-models|Transformer]] architecture and positions it as a major sequence modeling advance. + +## Related pages + +- [[summaries/attention-is-all-you-need]] +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/tensor2tensor]] +- [[entities/wmt-2014]] +- [[entities/wmt-2014-english-german]] +- [[entities/wmt-2014-english-french]] \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/tensor2tensor.md b/examples/commands/sample-wiki/entities/tensor2tensor.md new file mode 100644 index 00000000..5093fbc1 --- /dev/null +++ b/examples/commands/sample-wiki/entities/tensor2tensor.md @@ -0,0 +1,27 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Product" +description: "Open-source TensorFlow toolkit used to implement and evaluate the Transformer" +--- + +## Overview +Tensor2Tensor is a TensorFlow-based research toolkit for building and evaluating sequence modeling models. In [[summaries/attention-is-all-you-need]], it is described as the codebase that the authors used to develop, train, and evaluate the Transformer. + +## Role in the paper +The paper credits Tensor2Tensor as an important part of the implementation effort behind the Transformer work. It was used to replace an earlier codebase and to accelerate experimentation, model tuning, and evaluation. + +## Key facts from the document +- Used for designing, implementing, tuning, and evaluating Transformer variants. +- Helped replace an earlier internal codebase. +- Supported the authors' translation experiments and broader model development. +- Mentioned in the paper's closing note as the public codebase associated with the work. + +## Relationship to the Transformer +Tensor2Tensor is closely associated with the development of [[concepts/transformer-models|Transformer models]] described in [[summaries/attention-is-all-you-need]]. The toolkit provided the experimental infrastructure for the architecture that relies on [[concepts/attention-mechanisms|attention mechanisms]] and [[concepts/positional-encoding|positional encoding]]. + +## Related entities +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/wmt-2014-english-german]] +- [[entities/wmt-2014-english-french]] \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/university-of-toronto.md b/examples/commands/sample-wiki/entities/university-of-toronto.md new file mode 100644 index 00000000..4243bff2 --- /dev/null +++ b/examples/commands/sample-wiki/entities/university-of-toronto.md @@ -0,0 +1,25 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Organization" +description: "Canadian university affiliation of a Transformer paper author" +--- + +# University of Toronto + +## Overview + +The University of Toronto is a university in Canada. In [[summaries/attention-is-all-you-need]], it appears as the affiliation of author Aidan N. Gomez, who is listed with the University of Toronto on the paper "Attention Is All You Need." + +## Relation to the paper + +- Aidan N. Gomez is one of the authors of [[summaries/attention-is-all-you-need]]. +- The paper credits his affiliation as the [[entities/university-of-toronto|University of Toronto]]. +- The work was presented at [[entities/nips-2017|NIPS 2017]] and later became foundational to [[concepts/transformer-models]]. + +## Connected topics + +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/tensor2tensor]] diff --git a/examples/commands/sample-wiki/entities/wmt-2014-english-french.md b/examples/commands/sample-wiki/entities/wmt-2014-english-french.md new file mode 100644 index 00000000..13b1779d --- /dev/null +++ b/examples/commands/sample-wiki/entities/wmt-2014-english-french.md @@ -0,0 +1,31 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Event" +description: "The WMT 2014 English-French machine translation benchmark." +--- + +## Overview +WMT 2014 English-French is a machine translation evaluation task and dataset used in the paper [[summaries/attention-is-all-you-need]]. It serves as one of the main benchmarks for comparing translation quality and training cost across neural machine translation systems. + +## Role in the paper +The paper reports results on this benchmark to show that the [[concepts/transformer-models|Transformer]] achieves strong translation quality with much lower training cost than prior models. + +## Key facts from the document +- The dataset used for training contains about **36 million sentence pairs**. +- Tokens were split into a **32,000 word-piece vocabulary**. +- The paper reports a **BLEU score of 41.8** for the Transformer big model on this task. +- The model trained for **3.5 days on 8 GPUs**. +- Compared with previous single-model systems, the Transformer achieved a new state of the art at a fraction of the training cost. + +## Related concepts and entities +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/wmt-2014]] +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/tensor2tensor]] +- [[summaries/attention-is-all-you-need]] + +## Notes +In the paper, this benchmark is paired with the WMT 2014 English-German task as the main translation evaluation setting. \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/wmt-2014-english-german.md b/examples/commands/sample-wiki/entities/wmt-2014-english-german.md new file mode 100644 index 00000000..cdef2794 --- /dev/null +++ b/examples/commands/sample-wiki/entities/wmt-2014-english-german.md @@ -0,0 +1,32 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Event" +description: "The WMT 2014 English-German machine translation benchmark." +--- + +# WMT 2014 English-German + +## What it is + +WMT 2014 English-German is a machine translation benchmark used to evaluate sequence-to-sequence models. In [[summaries/attention-is-all-you-need]], it is one of the two main tasks used to test the [[concepts/transformer-models|Transformer]]. + +## Key facts from the paper + +- The dataset contains about **4.5 million sentence pairs**. +- Sentences were encoded using **byte-pair encoding** with a shared source-target vocabulary of about **37,000 tokens**. +- Training batches were formed by approximate sequence length and contained about **25,000 source tokens** and **25,000 target tokens**. +- The paper reports results on the **newstest2014** test set. +- The Transformer achieved **28.4 BLEU** with the big model, exceeding prior systems by more than 2 BLEU. +- The base model also outperformed previously published models at much lower training cost. + +## Role in the paper + +This benchmark is the primary English-to-German evaluation used to demonstrate that attention-only architectures can outperform recurrent and convolutional sequence models. It serves as the main evidence for the effectiveness of [[concepts/attention-mechanisms]] in machine translation. + +## Related pages + +- [[summaries/attention-is-all-you-need]] +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/wmt-2014]] +- [[entities/tensor2tensor]] \ No newline at end of file diff --git a/examples/commands/sample-wiki/entities/wmt-2014.md b/examples/commands/sample-wiki/entities/wmt-2014.md new file mode 100644 index 00000000..0cb72443 --- /dev/null +++ b/examples/commands/sample-wiki/entities/wmt-2014.md @@ -0,0 +1,31 @@ +--- +sources: ["summaries/attention-is-all-you-need.md"] +type: "Event" +description: "2014 machine translation benchmark and data release." +--- + +# WMT 2014 + +## Overview +WMT 2014 is the machine translation benchmark year used in [[summaries/attention-is-all-you-need]] to evaluate the Transformer on English-German and English-French translation. In the paper, it serves as the standard comparison point for prior state-of-the-art systems and for reporting BLEU scores. + +## Key facts from the paper +- The paper evaluates on the **WMT 2014 English-German** task and the **WMT 2014 English-French** task. +- These benchmarks are used to compare the Transformer against prior sequence-to-sequence systems. +- The Transformer achieves **28.4 BLEU** on English-German and **41.8 BLEU** on English-French. +- The paper reports that these results come at a fraction of the training cost of earlier models. + +## Related pages +- [[entities/wmt-2014-english-german]] +- [[entities/wmt-2014-english-french]] +- [[concepts/transformer-models]] +- [[concepts/attention-mechanisms]] +- [[entities/tensor2tensor]] +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/nips-2017]] +- [[summaries/attention-is-all-you-need]] + +## Notes +In this document, WMT 2014 refers to the benchmark settings and datasets used for machine translation evaluation rather than a single isolated conference event. \ No newline at end of file diff --git a/examples/commands/sample-wiki/explorations/how-does-the-transformer-replace-recurrence-with-self-attent.md b/examples/commands/sample-wiki/explorations/how-does-the-transformer-replace-recurrence-with-self-attent.md new file mode 100644 index 00000000..1eb21441 --- /dev/null +++ b/examples/commands/sample-wiki/explorations/how-does-the-transformer-replace-recurrence-with-self-attent.md @@ -0,0 +1,9 @@ +--- +query: "How does the Transformer replace recurrence with self-attention, and what does that buy in training speed?" +--- + +The Transformer removes recurrence entirely and uses stacked self-attention in the encoder and decoder instead. In place of step-by-step RNN computation, each layer attends directly over all tokens, with positional encoding added so the model still knows order. + +What that buys in training speed is mainly **more parallelism**: because self-attention has **constant sequential depth** per layer, the model can process tokens much more in parallel than recurrent models, which must proceed step by step. The wiki summary also notes that this leads to **shorter paths for long-range dependencies** and **faster training**, with the paper emphasizing substantially better training parallelization. + +In short: **recurrence is replaced by attention over the whole sequence, which makes training much more parallel and therefore faster**. diff --git a/examples/commands/sample-wiki/summaries/attention-is-all-you-need.md b/examples/commands/sample-wiki/summaries/attention-is-all-you-need.md new file mode 100644 index 00000000..3af4f670 --- /dev/null +++ b/examples/commands/sample-wiki/summaries/attention-is-all-you-need.md @@ -0,0 +1,136 @@ +--- +type: "Summary" +description: "Introduces the Transformer, a fully attention-based sequence model for translation." +doc_type: short +full_text: "sources/attention-is-all-you-need.md" +--- + +# Attention Is All You Need + +## Summary + +This paper introduces the [[concepts/transformer-models|Transformer]], a new sequence transduction architecture that replaces recurrent and convolutional layers with attention-only computation. The main claim is that self-attention is sufficient to model dependencies in encoder-decoder tasks while enabling much greater parallelization and faster training. + +## Core idea + +Traditional sequence-to-sequence systems relied on recurrent neural networks or convolutions, often combined with attention mechanisms. The Transformer removes recurrence entirely and uses stacked [[concepts/attention-mechanisms|attention mechanisms]] in both the encoder and decoder, plus position-wise feed-forward networks. + +This design has three major benefits: + +- higher training parallelism +- shorter paths for long-range dependencies +- strong translation quality with less compute + +## Architecture + +The Transformer uses an encoder-decoder structure: + +- **Encoder**: 6 identical layers, each with + - multi-head self-attention + - position-wise feed-forward network + - residual connections and layer normalization +- **Decoder**: 6 identical layers, each with + - masked self-attention + - encoder-decoder attention + - position-wise feed-forward network + - residual connections and layer normalization + +The model uses: + +- scaled dot-product attention +- [[concepts/attention-mechanisms|multi-head attention]] +- learned token embeddings +- sinusoidal [[concepts/positional-encoding|positional encoding]] + +## Attention mechanism + +Attention maps a query and a set of key-value pairs to an output vector computed as a weighted sum of values. The paper defines: + +- **Scaled dot-product attention**: attention scores are scaled by the square root of key dimension to avoid large dot products. +- **Multi-head attention**: multiple learned projections allow the model to attend to different representation subspaces and positions simultaneously. + +The authors argue that multi-head attention improves expressiveness compared with a single attention head. + +## Why self-attention + +The paper compares self-attention with recurrent and convolutional layers on: + +- per-layer computational complexity +- amount of parallelizable computation +- maximum path length between positions + +Main conclusion: + +- self-attention gives constant sequential depth +- it shortens paths between distant tokens +- it can be more efficient than recurrence for typical sentence-length inputs + +A table in the paper shows that self-attention has constant sequential operations and constant maximum path length per layer, while recurrent and convolutional alternatives require longer sequential computation or longer dependency paths. + +## Positional information + +Because the architecture has no recurrence or convolution, it must add position information explicitly. The paper uses fixed sinusoidal encodings, chosen because they may generalize to longer sequences and make relative position reasoning easier. Learned positional embeddings were also tested and gave similar results. + +## Training setup + +The models were trained on [[entities/wmt-2014|WMT 2014]] [[entities/wmt-2014-english-german|English-German]] and [[entities/wmt-2014-english-french|English-French]] translation data using: + +- byte-pair encoding or word-piece vocabularies +- Adam optimizer +- learning rate warmup and inverse square-root decay +- dropout and label smoothing + +The big model trained on 8 P100 GPUs for about 3.5 days; the base model trained for about 12 hours. + +## Results + +The Transformer achieved state-of-the-art results at the time: + +- **[[entities/wmt-2014-english-german|WMT 2014 English-German]]**: 28.4 BLEU with the big model +- **[[entities/wmt-2014-english-french|WMT 2014 English-French]]**: 41.8 BLEU with the big model + +These results surpassed previous systems, including ensembles, while using substantially less training cost. + +## Ablations and variations + +The paper evaluates architectural variations and finds that: + +- more attention heads help up to a point +- too small key dimensions hurt performance +- larger models perform better +- dropout is important for generalization +- learned positional embeddings perform similarly to sinusoidal ones + +## Generalization beyond translation + +To test whether the model transfers beyond machine translation, the authors apply it to English constituency parsing. The Transformer performs competitively on both supervised and semi-supervised settings, showing that the architecture generalizes well to structured prediction tasks. + +## Interpretation and visualization + +Attention visualizations suggest that individual heads learn different behaviors, such as: + +- long-distance dependency tracking +- anaphora resolution +- syntactic structure recognition + +The authors present these as evidence that attention heads can capture interpretable linguistic patterns. + +## Conclusion + +The paper establishes the Transformer as a simple and effective alternative to recurrent and convolutional sequence models. Its attention-only design improves parallelism, reduces training cost, and achieves state-of-the-art translation quality. The work became foundational for modern [[concepts/transformer-models|Transformer models]] in natural language processing and beyond. + +## Related Concepts +- [[concepts/attention-mechanisms]] +- [[concepts/transformer-models]] +- [[concepts/positional-encoding]] + +## Entities +- [[entities/google]] +- [[entities/google-brain]] +- [[entities/google-research]] +- [[entities/university-of-toronto]] +- [[entities/nips-2017]] +- [[entities/wmt-2014]] +- [[entities/wmt-2014-english-german]] +- [[entities/wmt-2014-english-french]] +- [[entities/tensor2tensor]] diff --git a/examples/configuration/README.md b/examples/configuration/README.md new file mode 100644 index 00000000..a3025339 --- /dev/null +++ b/examples/configuration/README.md @@ -0,0 +1,163 @@ +# Configuration & setup + +Everything that controls how OpenKB talks to your LLM lives in two places: +`.openkb/config.yaml` (model, language, tuning) and a `.env` file (your API key). + +--- + +## 1. Initialize a knowledge base + +```bash +mkdir my-kb && cd my-kb +openkb init +``` + +`init` is interactive in a terminal and prompts for three things: + +- **Model** — in LiteLLM `provider/model` format. OpenAI models can drop the + prefix (`gpt-5.4`); others need it (`anthropic/claude-sonnet-4-6`, + `gemini/gemini-3-flash-preview`). +- **LLM API key** — hidden input; if you provide one it's written to `.env` with + `0600` permissions. Press Enter to skip and set it later. +- **Language** — the output language for your wiki (`en`, `ko`, `Korean`, …). + +Skip the prompts entirely with flags — handy in scripts: + +```bash +openkb init --model anthropic/claude-sonnet-4-6 --language en +openkb init -m gpt-5.4 -l ko +``` + +> **Non-interactive (pipes/CI):** prompts are gated on a TTY. When stdin isn't a +> terminal, `init` uses the defaults instead of hanging, so +> `printf 'gpt-5.4\n\nen\n' | openkb init` works in a script. + +`init` creates: `raw/`, `wiki/{summaries,concepts,entities,sources/images}`, +`wiki/AGENTS.md`, `wiki/index.md`, `wiki/log.md`, and `.openkb/config.yaml`. + +--- + +## 2. `.openkb/config.yaml` reference + +The file `init` writes is small; everything else is optional. This is the shipped +[`config.yaml.example`](../../config.yaml.example), verbatim: + +```yaml +model: gpt-5.4 # LLM model (any LiteLLM-supported provider) +language: en # Wiki output language +pageindex_threshold: 20 # PDF pages threshold for PageIndex + +# Optional: override the entity-type vocabulary used for entity pages. +# Omit this key to use the default 7 types +# (person, organization, place, product, work, event, other). +# entity_types: +# - person +# - organization +# - dataset +# - model + +# Optional: LLM / LiteLLM tuning. Keys are forwarded to LiteLLM; `timeout` and +# `extra_headers` apply per request, the rest are set as litellm.. +# litellm: +# timeout: 1200 # per-request timeout (s); raise for slow local backends (Ollama) +# drop_params: true # let LiteLLM drop params a provider rejects (e.g. Ollama) +# num_retries: 3 +# extra_headers: # extra HTTP headers some providers need (e.g. GitHub Copilot) +# Editor-Version: vscode/1.95.0 +# Copilot-Integration-Id: vscode-chat +``` + +| Key | Default | What it does | +| --- | --- | --- | +| `model` | `gpt-5.4` | LLM used for all compile/query/chat work. | +| `language` | `en` | Language the wiki is written in. | +| `pageindex_threshold` | `20` | PDFs with **more** pages than this take the long-doc (PageIndex) path; fewer go through the short-doc path. See [`pageindex-cloud/`](../pageindex-cloud/). | +| `entity_types` | 7 defaults | Custom vocabulary for entity pages. `other` is always kept. | +| `litellm:` | – | A pass-through block for LiteLLM. See below. | + +### The `litellm:` block + +OpenKB forwards this block to LiteLLM so you can tune anything LiteLLM supports — +you set it, LiteLLM uses it. Two keys are special: + +- `timeout` and `extra_headers` are applied **per request** (they're needed on + every call). +- Every other key (`drop_params`, `num_retries`, `ssl_verify`, …) is set on the + `litellm` module as a process-wide global. + +#### A local Ollama setup + +Ollama rejects some OpenAI-style params; `drop_params` lets LiteLLM strip them +instead of erroring, and a generous `timeout` covers slow local inference: + +```yaml +model: ollama/llama3.1 +language: en +litellm: + drop_params: true + timeout: 1200 +``` + +#### GitHub Copilot / ChatGPT-subscription providers + +These need extra headers and use OAuth (no API key): + +```yaml +model: github_copilot/gpt-4o +language: en +litellm: + extra_headers: + Editor-Version: vscode/1.95.0 + Copilot-Integration-Id: vscode-chat +``` + +--- + +## 3. API keys & providers + +Set one universal key and OpenKB routes it to the right provider based on your +`model`. The shipped [`.env.example`](../../.env.example): + +```bash +# OpenAI: LLM_API_KEY=sk-... +# Anthropic: LLM_API_KEY=sk-ant-... +# Gemini: LLM_API_KEY=AIza... +LLM_API_KEY=your-key-here +``` + +- **Provider auto-detection:** `model: anthropic/claude-sonnet-4-6` → your + `LLM_API_KEY` is exported as `ANTHROPIC_API_KEY` automatically. +- **OAuth providers** (`chatgpt/*`, `github_copilot/*`) need **no** key — OpenKB + won't warn about a missing one. +- **PageIndex Cloud** uses a separate `PAGEINDEX_API_KEY` (see + [`pageindex-cloud/`](../pageindex-cloud/)). + +**Where keys are read from** (first match wins, existing env always respected): + +1. your shell environment +2. `/.env` +3. `~/.config/openkb/.env` (a global key shared across all your KBs) + +--- + +## 4. Where is "the KB"? + +Most commands need to know which KB they act on. Resolution order: + +1. `--kb-dir /path/to/kb` (or `OPENKB_DIR=/path/to/kb`) — explicit override. +2. Walk up from the current directory looking for a `.openkb/` folder. +3. The global default registered by `openkb use ` (stored in + `~/.config/openkb/global.yaml`). + +```bash +# Run a query against a specific KB from anywhere +openkb --kb-dir ~/research-kb query "what changed in v2?" + +# Make one KB the default, then forget about paths +openkb use ~/research-kb +openkb status # now resolves ~/research-kb from any directory +``` + +--- + +Next: [`commands/`](../commands/) — the everyday ingest-and-query loop. diff --git a/examples/pageindex-cloud/README.md b/examples/pageindex-cloud/README.md new file mode 100644 index 00000000..344d6352 --- /dev/null +++ b/examples/pageindex-cloud/README.md @@ -0,0 +1,129 @@ +# PageIndex Cloud workflows + +OpenKB scales to long documents with [PageIndex](https://github.com/VectifyAI/PageIndex)'s +tree-based, vectorless retrieval. This guide covers the three ways long documents +flow through OpenKB: **local** indexing, **cloud** OCR/indexing, and **importing** +a document already indexed in PageIndex Cloud. + +> **Try it with** [`../docs/deepseek-r1.pdf`](../docs/deepseek-r1.pdf) (~22 pages, +> just over the threshold → long-doc path) or +> [`../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf`](../docs/) +> (700+ pages → exercises cloud OCR + page windowing). + +--- + +## Short vs. long: the threshold + +When you `openkb add` a PDF, its page count decides the path: + +| Page count | Path | Engine | +| --- | --- | --- | +| `≤ pageindex_threshold` (default 20) | short-doc | markitdown → LLM reads full text | +| `> pageindex_threshold` | long-doc | PageIndex tree index | + +For the long-doc path, whether it runs **locally** or in the **cloud** depends only +on one environment variable: + +| `PAGEINDEX_API_KEY` | Long-doc engine | +| --- | --- | +| unset | local (pymupdf text + image extraction) | +| set | PageIndex Cloud OCR (markdown + figures), with local fallback if the cloud call fails | + +```bash +# Local long-doc indexing — no key, no network +openkb add ../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf + +# Cloud OCR for the same long PDF — just set the key first +export PAGEINDEX_API_KEY="pi-..." +openkb add ../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf +``` + +Either way the result is the same wiki artifacts (`wiki/sources/.json` + +`wiki/summaries/.md` + concept/entity pages) and the document shows up as +type `pageindex` in `openkb list`. + +--- + +## Importing an already-indexed cloud document + +If a document is **already indexed in PageIndex Cloud**, you don't need the local +PDF at all — import it by `doc_id`: + +```bash +export PAGEINDEX_API_KEY="pi-..." +openkb add --from-pageindex-cloud +``` + +This fetches the document tree (structure + description) and OCR'd page content +from the cloud, then compiles concepts and entities locally — exactly like a local +long PDF, but with no file on disk. It is registered with type `pageindex_cloud`. + +What it does **not** do: + +- It never modifies the cloud corpus — import is read-only. +- It's **idempotent** — re-importing the same `doc_id` is skipped. +- `openkb remove` on an imported doc cleans only your **local** wiki artifacts; + the document in PageIndex Cloud is left untouched. + +> **Page windowing:** PageIndex caps a single page-content request at 1000 pages. +> OpenKB fetches in 1000-page windows and stops when a window comes back short, so +> documents of any length (including 700+ page books) import completely. + +### Finding a `doc_id` + +List what's in your cloud collection with the PageIndex client: + +```python +import os +from pageindex import PageIndexClient + +client = PageIndexClient(api_key=os.environ["PAGEINDEX_API_KEY"]) +col = client.collection() + +for doc in col.list_documents(): + print(doc["doc_id"], "—", doc.get("doc_name")) +``` + +```text +pi-cmn3k8... — attention-is-all-you-need.pdf +pi-x7f0aa... — deepseek-r1.pdf +``` + +Then: + +```bash +openkb add --from-pageindex-cloud pi-cmn3k8... +``` + +--- + +## An import, end to end + +```bash +$ export PAGEINDEX_API_KEY="pi-..." +$ openkb add --from-pageindex-cloud pi-cmn3k8... +Importing from PageIndex Cloud: pi-cmn3k8... + Fetching structure + OCR pages... + Compiling concepts and entities... + [OK] attention-is-all-you-need imported from PageIndex Cloud. + +$ openkb add --from-pageindex-cloud pi-cmn3k8... + [SKIP] Already imported from PageIndex Cloud: pi-cmn3k8... +``` + +The registry entry it writes (`.openkb/hashes.json`) — note there's no `raw_path`, +because there's no local file: + +```json +{ + "name": "attention-is-all-you-need.pdf", + "doc_name": "attention-is-all-you-need-abc12345", + "type": "pageindex_cloud", + "path": "pageindex-cloud:pi-cmn3k8...", + "source_path": "wiki/sources/attention-is-all-you-need-abc12345.json", + "doc_id": "pi-cmn3k8..." +} +``` + +After import, the imported doc behaves like any other document — `query`, `chat`, +`recompile`, `visualize`, and `skill new` all see it. diff --git a/examples/skills/README.md b/examples/skills/README.md new file mode 100644 index 00000000..e14b7cde --- /dev/null +++ b/examples/skills/README.md @@ -0,0 +1,134 @@ +# Skill Factory + +The Skill Factory distills your wiki into a **redistributable agent skill** — a +self-contained directory that Claude Code, Codex, and Gemini CLI can load +natively. Where `query`/`chat` answer questions, a skill packages your knowledge +so *another* agent reasons with it. + +> **Real artifact in this folder:** [`transformer-attention/`](transformer-attention/) +> was generated by `openkb skill new` from one paper — the actual +> [`SKILL.md`](transformer-attention/SKILL.md), its +> [`references/`](transformer-attention/references/), and the +> [`marketplace.json`](marketplace.json) it wrote. Open the `SKILL.md`: it's a set +> of *decision rules* and a precise trigger description, not a summary. + +A skill is: + +``` +output/skills// +├── SKILL.md # frontmatter (name + description) + the worldview/decision rules +└── references/ # optional supporting pages (resolved wikilinks) +``` + +--- + +## Create a skill + +```bash +openkb skill new "" +``` + +- `` — a kebab-case slug (also the skill's name and output folder). +- `` — a natural-language description of what the skill should do; this + shapes both what content the generator pulls from the wiki and the skill's + `description:` (which is what makes it *trigger* in an agent). + +```bash +openkb skill new transformer-attention \ + "Reason about the Transformer architecture — self-attention, multi-head \ + attention, positional encoding — and why attention replaced recurrence" +``` + +```text +Compiling skill 'transformer-attention'... +Saved: output/skills/transformer-attention/ +Manifest: .claude-plugin/marketplace.json updated + +Install locally: cp -r output/skills/transformer-attention ~/.claude/skills/ +Share: push your KB to GitHub, then npx skills@latest add / +``` + +That command produced [`transformer-attention/`](transformer-attention/) and +[`marketplace.json`](marketplace.json) in this folder. Recompiling over an existing +skill (`--yes` to skip the overwrite prompt) backs up the old version into +`output/skills/-workspace/iteration-N/` with a `diff.md`. + +--- + +## Iterate, validate, evaluate + +```bash +openkb skill history transformer-attention # list saved iterations +openkb skill rollback transformer-attention --to 1 # restore iteration-1 +openkb skill validate transformer-attention --strict # structural checks +openkb skill eval transformer-attention --save # measure quality +``` + +- **`validate`** runs structural checks only (no LLM): frontmatter is valid, name + is kebab-case, description length, file-size caps, and that wikilinks resolve. + `--strict` turns warnings into a non-zero exit — good for CI. Omit the name to + validate every skill in the KB. +- **`eval`** generates trigger/no-trigger prompts and grades two things: + +```text +$ openkb skill eval transformer-attention --save --count 15 +Generating eval set for 'transformer-attention' (count=15 per side)... + +Eval set: 30 prompts +Trigger accuracy: 28/30 (93%) — does the description fire on the right questions? +Body coverage: 27/28 (96%) — does SKILL.md actually support what it promises? + +Trigger misses (2): + - [should-trigger → no] "How does attention scale with sequence length?" + - [should-not → yes] "What's the capital of France?" + +Eval set persisted to .openkb/eval-sets/transformer-attention.json +``` + +`--save` lets you re-run the *same* eval set later (`--eval-set `) so you can +compare iterations apples-to-apples. + +--- + +## Anatomy of the generated skill + +The frontmatter is what an agent matches against — the `description` spells out +*exactly* when to fire and when **not** to. From this folder's +[`transformer-attention/SKILL.md`](transformer-attention/SKILL.md): + +```yaml +--- +name: transformer-attention +description: Use when reasoning about Transformer self-attention, multi-head + attention, positional encoding, masked decoder attention, or why attention + replaced recurrence/convolutions in sequence models; not for generic NLP or + unrelated attention topics. +--- +``` + +…followed by decision rules like *"When recurrence is the bottleneck, prefer +attention-only computation"* and an explicit **Known gaps** section. Each compile +also (re)writes the plugin manifest at `.claude-plugin/marketplace.json` +([copy here](marketplace.json)), which is what `npx skills add` installs. + +> OpenKB ships its own distilled skill too — +> [`../../skills/openkb/SKILL.md`](../../skills/openkb/SKILL.md) — the one that +> teaches an agent to navigate any OpenKB wiki. + +--- + +## Distribute it + +```bash +# Locally, for your own Claude Code: +cp -r output/skills/transformer-attention ~/.claude/skills/ + +# To others — push the KB to GitHub, then they run: +npx skills@latest add / +``` + +Once installed, the skill loads automatically whenever an agent's task matches its +`description` — which is exactly what `skill eval` measures. + +> **Note:** `marketplace.json`'s `owner`/`author` are derived from your local git +> config. The copy here has been scrubbed to `Your Name` / `you@example.com`. diff --git a/examples/skills/marketplace.json b/examples/skills/marketplace.json new file mode 100644 index 00000000..e2e1f0c1 --- /dev/null +++ b/examples/skills/marketplace.json @@ -0,0 +1,26 @@ +{ + "name": "vectify", + "owner": { + "name": "Your Name", + "email": "you@example.com" + }, + "metadata": { + "description": "Skills compiled from the demo-kb knowledge base via OpenKB.", + "version": "0.1.0" + }, + "plugins": [ + { + "name": "openkb", + "description": "Knowledge skills compiled from this OpenKB-managed knowledge base.", + "source": "./", + "version": "0.1.0", + "author": { + "name": "Your Name", + "email": "you@example.com" + }, + "skills": [ + "./output/skills/transformer-attention" + ] + } + ] +} diff --git a/examples/skills/transformer-attention/SKILL.md b/examples/skills/transformer-attention/SKILL.md new file mode 100644 index 00000000..a5b9b923 --- /dev/null +++ b/examples/skills/transformer-attention/SKILL.md @@ -0,0 +1,52 @@ +--- +name: transformer-attention +description: Use when reasoning about Transformer self-attention, multi-head attention, positional encoding, masked decoder attention, or why attention replaced recurrence/convolutions in sequence models; not for generic NLP or unrelated attention topics. +--- + +# Transformer Attention Reasoning + +This skill encodes the practical worldview behind the original Transformer: sequence modeling works better when you stop stepping through tokens one at a time and instead let positions interact directly through attention. Use it to answer questions like “why does self-attention help,” “why do we need positional encoding,” “what does masking protect,” or “when is multi-head attention useful?” + +## When to use this skill + +- User is comparing Transformers against RNNs, LSTMs, GRUs, or convolutional seq2seq models +- User asks how self-attention, multi-head attention, or scaled dot-product attention works in the encoder or decoder +- User wants to know why positional encoding is required in an attention-only architecture +- User is debugging or explaining masked self-attention, autoregressive decoding, or encoder-decoder attention +- User asks why attention can shorten long-range dependency paths or improve parallelism +- Not for: generic “attention” in psychology, vision, or recommendation systems +- Not for: broad modern LLM training, prompting, or scaling-law questions unless the focus is the Transformer mechanism itself +- Not for: implementation-level optimization details unrelated to the architecture’s reasoning + +## Core decision rules + +- **When recurrence is the bottleneck, prefer attention-only computation** — recurrence forces sequential hidden-state updates and blocks parallelism within a training example. +- **When long-range dependencies matter, prefer self-attention over stacked recurrence or convolution** — any token can connect to any other token in one layer, so the path length stays short. +- **If the model has no recurrence or convolution, add explicit position information** — attention alone is permutation-blind, so positional encoding supplies order. +- **When decoding autoregressively, mask future positions** — otherwise the model leaks rightward information and can condition on tokens it should not know yet. +- **When one attention pattern seems too coarse, use multi-head attention** — separate heads let the model attend to different subspaces, positions, or relation types in parallel. +- **When dot products get too sharp at larger key dimensions, scale by \(\sqrt{d_k}\)** — this keeps softmax gradients usable and avoids overconfident attention scores. +- **If you need encoder-to-decoder alignment, use encoder-decoder attention, not plain self-attention** — the decoder should query the encoded source sequence directly. +- **When comparing layer types, evaluate sequential depth and maximum path length, not just parameter count** — the Transformer wins because it reduces sequential operations and dependency distance. +- **If the task is sentence-length sequence modeling, self-attention is often computationally attractive** — its per-layer complexity is favorable when sequence length is below representation width, which is common in translation. +- **When a single head seems to blur distinct relationships, interpret the averaging as a limitation, not a virtue** — multiple heads counteract that loss of resolution. +- **When output quality must remain stable, pair the architecture with residual connections, layer normalization, dropout, and label smoothing** — the paper treats these as part of making the attention stack train well. +- **If a learned positional embedding works, don’t assume it beats sinusoidal encoding** — the original result found similar performance; sinusoidal encodings were chosen for extrapolation potential. + +## Approach + +1. Identify which attention role is in play: encoder self-attention, masked decoder self-attention, or encoder-decoder attention. +2. Check whether the question is about ordering, dependency distance, or parallelization; those are the main reasons the architecture changes. +3. If the question concerns a design choice, test it against the paper’s core trade-off: sequential recurrence versus parallel attention with explicit position signals. +4. Use multi-head attention and scaling rules to explain expressiveness and training stability. +5. If the user is asking “why not RNNs?”, answer in terms of sequential computation, path length, and ease of long-range dependency learning. + +## References + +- [[references/original-transformer-paper]] + +## Known gaps + +- This skill is grounded in the original Transformer paper and does not cover later variants such as sparse attention, rotary position encodings, FlashAttention, or modern decoder-only LLM design. +- It does not provide implementation code, tensor shapes for every sublayer, or training-hyperparameter tuning advice beyond the architectural choices discussed in the source. +- It focuses on the reasoning for replacing recurrence; it does not deeply cover convolutional alternatives beyond their role as baselines in the comparison. diff --git a/examples/skills/transformer-attention/references/original-transformer-paper.md b/examples/skills/transformer-attention/references/original-transformer-paper.md new file mode 100644 index 00000000..05410b2f --- /dev/null +++ b/examples/skills/transformer-attention/references/original-transformer-paper.md @@ -0,0 +1,11 @@ +# Original Transformer Paper + +The source material for this skill is the paper **Attention Is All You Need**. The distilled worldview is: + +- sequence transduction should be built from attention rather than recurrence when you want parallel training and short paths between distant tokens +- self-attention is the mechanism that lets each position read the whole sequence in one layer +- multi-head attention restores expressiveness by letting the model attend in several learned subspaces at once +- positional encoding is mandatory because attention alone does not know order +- masked decoder self-attention preserves autoregressive generation by preventing future-token leakage + +The paper’s practical comparison is not “attention vs. everything”; it is a specific engineering trade-off against RNNs and convolutional sequence models. The architecture wins by reducing sequential computation, while still handling alignment, dependency tracking, and output generation through specialized attention blocks. diff --git a/examples/slides/README.md b/examples/slides/README.md new file mode 100644 index 00000000..ce78b600 --- /dev/null +++ b/examples/slides/README.md @@ -0,0 +1,86 @@ +# Slides + +`openkb deck new` turns wiki content into a polished, **single-file HTML slide deck** — +all CSS/JS inlined, no external assets, keyboard-navigable, ready to open +full-screen or share. + +> **Real artifact in this folder:** [`attention-intro.html`](attention-intro.html) +> — a 9-slide deck generated by `openkb deck new` from one paper, in the +> `openkb-deck-neon` theme. Open it in a browser (← → to navigate, `F` for +> full-screen). It's one self-contained file. + +```bash +openkb deck new "" +``` + +- `` — kebab-case slug; output lands at `output/decks//index.html`. +- `` — what the deck is about; the generator selects and orders wiki + content to match. + +```bash +openkb deck new attention-intro \ + "A short intro deck on the Transformer and self-attention for an ML reading group" +``` + +```text +Generating deck 'attention-intro' via skill openkb-deck-neon (default)... +Deck written to output/decks/attention-intro/index.html +``` + +That command produced [`attention-intro.html`](attention-intro.html). You can also +drive this from inside [`openkb chat`](../chat/) with `/deck new …`. + +--- + +## Options + +| Flag | Effect | +| --- | --- | +| `--skill ` | Choose the deck theme/skill (default `openkb-deck-neon`). | +| `--critique` | Run a second-pass critic (`openkb-html-critic`) that reviews and fixes CSS/UX issues before saving. | +| `-y, --yes` | Overwrite an existing deck without prompting (the old one is backed up to `output/decks/-workspace/iteration-N/`). | + +```bash +# Warm editorial look, with a quality pass: +openkb deck new okf-pitch "Pitch OpenKB to a data team" \ + --skill openkb-deck-editorial --critique +``` + +--- + +## Themes + +Decks are rendered by a **deck skill** — a SKILL.md that defines the visual +direction and a slide grammar. Two ship in the repo: + +| Skill | Look | +| --- | --- | +| [`openkb-deck-neon`](../../skills/openkb-deck-neon/SKILL.md) *(default)* | Dark "Aurora Glass" — near-black background, teal/sky/magenta/amber neon, glassmorphism panels. | +| [`openkb-deck-editorial`](../../skills/openkb-deck-editorial/SKILL.md) | Warm "Editorial Monocle" — cream, serif, brick-red accent; a printed-page feel. | + +Drop a third-party deck skill into `~/.openkb/skills/` or `/skills/` and select +it with `--skill`. (A KB only has a deck theme available if one is installed in one +of those locations — copy it from the repo's [`skills/`](../../skills/) if needed.) + +--- + +## A grammar keeps decks from degrading + +Each deck skill declares a slide grammar that the output must satisfy. The +`openkb-deck-neon` theme requires a `cover` and `closing`, at least 4 distinct +slide types, and no more than 2 of the same type in a row: + +```yaml +od: + mode: deck + output_path_template: "output/decks/{slug}/index.html" + deck_grammar: + required: [cover, closing] + allowed: [cover, chapter, thesis, quote, compare, data, closing] + min_distinct: 4 + max_consecutive_same: 2 +``` + +[`attention-intro.html`](attention-intro.html) satisfies it with 9 slides spanning +`cover · thesis · quote · compare · data · closing` — which is what stops it from +collapsing into a wall of identical bullet slides. diff --git a/examples/slides/attention-intro.html b/examples/slides/attention-intro.html new file mode 100644 index 00000000..3adb9ae7 --- /dev/null +++ b/examples/slides/attention-intro.html @@ -0,0 +1,151 @@ + + + + + +Attention Intro + + + +
+
OPENKB
ATTENTION INTRO
+
+
+
+
ML reading group
+

Transformer &
self-attention

+

An attention-only sequence model replaces recurrence, adds positional encoding, and trains in parallel.

+
+
Attention weights relevance
+
No recurrence
+
Encoder + decoder
+
+
+
+ +
1 / 9
attention-is-all-you-need
+
+
+
+
CHAPTER 01
attention-is-all-you-need
+
+
+

The Transformer’s claim: self-attention is enough for sequence modeling.

+

The paper replaces recurrent and convolutional layers with stacked attention plus position-wise feed-forward networks, aiming for better parallelism and shorter paths between distant tokens.

+
+
+
Why it mattered
+
+
Higher training parallelism
+
Constant-depth paths within a layer
+
Strong translation quality with less compute
+
+
+
+
2 / 9
summary
+
+
+
CHAPTER 02
concepts/transformer-models
+
+
+
Before
+

Recurrent / convolutional sequence models

+
    +
  • Process tokens with more sequential steps
  • +
  • Long-range dependencies take longer paths
  • +
  • Parallelization is limited by the architecture
  • +
+
+
+
After
+

Transformer stacks of attention

+
    +
  • All positions can interact in parallel
  • +
  • Self-attention shortens dependency paths
  • +
  • Encoder and decoder both reuse attention blocks
  • +
+
+
+
3 / 9
transformer-models
+
+
+
CHAPTER 03
concepts/attention-mechanisms
+
+
Attention in one line
+
query × keys → weighted values
+

A query is scored against keys, the scores become weights, and the values are combined into a context vector. Self-attention is the special case where query, key, and value all come from the same sequence.

+
+
4 / 9
attention-mechanisms
+
+
+
CHAPTER 04
scaled dot-product attention
+
+
+

Scaled dot-product attention keeps scores from getting too sharp.

+

The paper scales dot products by the square root of key dimension before softmax. That prevents large raw scores from collapsing the distribution and makes training more stable.

+
+
+
Mechanism
+
+
Q
Query
+
+
K,V
Keys + values
+
+
Score, scale, softmax, then sum the values.
+
+
+
5 / 9
summary
+
+
+
CHAPTER 05
concepts/positional-encoding
+
+

Without recurrence or convolution, the model needs explicit position information; fixed sinusoidal encodings give each token a location while preserving parallel computation.

+
— positional encoding in the Transformer
+
+
6 / 9
positional-encoding
+
+
+
CHAPTER 06
encoder / decoder
+
+
+
Encoder
+
    +
  • 6 identical layers
  • +
  • Multi-head self-attention
  • +
  • Position-wise feed-forward network
  • +
+
+
+
Decoder
+
    +
  • 6 identical layers
  • +
  • Masked self-attention + encoder attention
  • +
  • Residual connections and layer norm
  • +
+
+
+
7 / 9
architecture
+
+
+
CHAPTER 07
wmt-2014
+
+
Headline result
+
28.4 BLEU → 41.8 BLEU
+

The big model reached state-of-the-art translation results on WMT 2014 English-German and English-French, while also training faster thanks to more parallel computation.

+
+
8 / 9
results
+
+
+
OPENKB
NEXT STEP
+
+

Read the paper
with the attention map in mind.

+

When you see a Transformer block, ask three questions: what is being attended to, where does order come from, and how does the stack keep information flowing across the sequence?

+
+
9 / 9
thanks
+
+ + + \ No newline at end of file diff --git a/examples/visualize/README.md b/examples/visualize/README.md new file mode 100644 index 00000000..987b8e4e --- /dev/null +++ b/examples/visualize/README.md @@ -0,0 +1,77 @@ +# Visualize + +`openkb visualize` renders your wiki's `[[wikilink]]` graph as a self-contained, +interactive HTML page — a fast way to *see* how knowledge connects across the +documents you've ingested. + +> **Real artifact in this folder:** [`graph.html`](graph.html) — the graph +> generated from one paper (13 nodes, 96 edges). Open it in a browser and switch +> between the **3D / mind-map / radial** modes; click a node to inspect it. + +```bash +openkb visualize +``` + +```text +Graph written to /Users/you/my-kb/output/visualize/graph.html (13 nodes, 96 edges) +# opens in your default browser +``` + +By default it opens the page in your browser after generating. For headless +environments (CI, a remote box), skip the launch: + +```bash +openkb visualize --no-open +``` + +> **Prerequisite:** you need a compiled wiki. With nothing ingested yet, it tells +> you to run `openkb add` first. The [`graph.html`](graph.html) here was built from +> the wiki in [`../commands/sample-wiki/`](../commands/sample-wiki/). + +--- + +## What's in the graph + +- **Nodes** — every page under `wiki/summaries/`, `wiki/concepts/`, and + `wiki/entities/`. Each carries its label, type, description, sources, and + in/out degree (used to size it). +- **Edges** — directed `[[wikilink]]` references between pages. Self-loops are + dropped and duplicates collapsed. +- **Types** — taken from a page's frontmatter or its directory, and used to color + nodes and drive the legend filter. + +The single HTML file embeds the graph data and all rendering code, so you can +share it or commit it without any external dependencies. + +--- + +## Three view modes + +Switch between these from buttons in the page: + +| Mode | Layout | +| --- | --- | +| **3D** *(default)* | Force-directed "nebula" in 3D — orbit, zoom, and drag-to-pin nodes. | +| **mind-map** | Horizontal tree: OpenKB → documents (summaries) → concepts. | +| **radial** | OpenKB at the hub, documents on spokes, concepts radiating out; zoom/pan. | + +The page also has search (filter by label), a type legend you can toggle, a +spacing slider, and a node inspector — click a node to see its description, +sources, and links. + +--- + +## Where it fits + +`visualize` is read-only and re-runs cheaply, so it pairs well with the rest of +the loop: + +```bash +openkb add ~/papers/ # ingest a batch +openkb lint --fix # repair any dangling links so the graph is clean +openkb visualize # see the shape of what you've built +``` + +A sparse graph with many orphans usually means documents aren't sharing concepts +yet — add more in the same domain and watch the concept hubs grow as knowledge +compounds. diff --git a/examples/visualize/graph.html b/examples/visualize/graph.html new file mode 100644 index 00000000..fa609407 --- /dev/null +++ b/examples/visualize/graph.html @@ -0,0 +1,907 @@ + + + + + +openkb · knowledge graph + + + +
+
+
+ +
+ +
openkbknowledge graph
+ +
+ 0nodes + · + 0edges +
+
+ +
+ +
+ +
+
+ +
+ + + +
+ +
+ + + + From 1c3105e8ea915cd0a12f14f26e04b7c7bf645ce6 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 12:22:01 +0800 Subject: [PATCH 02/11] docs(examples): document pre-release install + slow local-runtime timeout Cover recurring install/usage questions from the issue tracker: - #130 / #24: openkb pins a pre-release dependency (pageindex==0.3.0.dev1), which uv/pip skip by default. Show `uv tool install --prerelease=allow` / `pip install --pre`, plus a PATH note for the "command not found" case. - #140: local runtimes (LM Studio on Mac, Ollama, llama.cpp) abort on the default request timeout; document raising litellm.timeout in config. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- examples/configuration/README.md | 33 +++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/examples/configuration/README.md b/examples/configuration/README.md index a3025339..9e569d26 100644 --- a/examples/configuration/README.md +++ b/examples/configuration/README.md @@ -5,6 +5,27 @@ Everything that controls how OpenKB talks to your LLM lives in two places: --- +## Install + +```bash +pip install openkb +``` + +OpenKB pins a **pre-release** of its PageIndex dependency +(`pageindex==0.3.0.dev1`), which some installers skip by default. If an install +can't resolve `pageindex`, allow pre-releases: + +```bash +uv tool install openkb --prerelease=allow # uv +pip install --pre openkb # pip +``` + +If `openkb` isn't found *after* a successful install, the console-script directory +isn't on your `PATH` (e.g. `pip --user` installs to `~/.local/bin`) — add it to +`PATH`. + +--- + ## 1. Initialize a knowledge base ```bash @@ -85,17 +106,19 @@ you set it, LiteLLM uses it. Two keys are special: - Every other key (`drop_params`, `num_retries`, `ssl_verify`, …) is set on the `litellm` module as a process-wide global. -#### A local Ollama setup +#### Slow local runtimes (Ollama, LM Studio, llama.cpp) -Ollama rejects some OpenAI-style params; `drop_params` lets LiteLLM strip them -instead of erroring, and a generous `timeout` covers slow local inference: +Local inference can be slow — on a Mac running **LM Studio**, a single compile +call can take minutes, and the **default request timeout will abort it** (this is +the usual cause of failures with local runtimes). Raise `timeout` (in seconds). +Add `drop_params` for backends that reject OpenAI-only params (e.g. Ollama): ```yaml -model: ollama/llama3.1 +model: ollama/llama3.1 # or your LM Studio / llama.cpp model id language: en litellm: drop_params: true - timeout: 1200 + timeout: 1200 # raise further (e.g. 3600) for large local models ``` #### GitHub Copilot / ChatGPT-subscription providers From 48269ecd6be7cdb79789f782e7f4a5ba33d5fe10 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 12:38:02 +0800 Subject: [PATCH 03/11] docs(examples): use UN official languages in language examples Replace the ad-hoc ko/Korean language examples with the six official UN languages (en/zh/es/fr/ar/ru) so the config docs lead with widely-used options. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- examples/configuration/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/configuration/README.md b/examples/configuration/README.md index 9e569d26..14a8c199 100644 --- a/examples/configuration/README.md +++ b/examples/configuration/README.md @@ -40,13 +40,15 @@ openkb init `gemini/gemini-3-flash-preview`). - **LLM API key** — hidden input; if you provide one it's written to `.env` with `0600` permissions. Press Enter to skip and set it later. -- **Language** — the output language for your wiki (`en`, `ko`, `Korean`, …). +- **Language** — the output language for your wiki. Any language works; e.g. the + six official UN languages: `en` (English), `zh` (Chinese), `es` (Spanish), + `fr` (French), `ar` (Arabic), `ru` (Russian). Skip the prompts entirely with flags — handy in scripts: ```bash openkb init --model anthropic/claude-sonnet-4-6 --language en -openkb init -m gpt-5.4 -l ko +openkb init -m gpt-5.4 -l zh ``` > **Non-interactive (pipes/CI):** prompts are gated on a TTY. When stdin isn't a From 3b3f3e08c8f1b568890feaaec523fc5dbdc6070e Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 12:42:13 +0800 Subject: [PATCH 04/11] docs(readme): link to per-case usage examples Add an Examples section pointing to examples/ (and examples/README.md), with a table of the per-feature cases. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 106b2b8b..155e01b5 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,26 @@ Create a `.env` file with your LLM API key: LLM_API_KEY=your_llm_api_key ``` +# 📚 Examples + +Worked examples for each feature live in [`examples/`](examples/) — every case is a +folder with a short walkthrough plus the **real artifact** OpenKB generated for it +(a compiled wiki, a distilled skill, an HTML slide deck, an interactive graph). + +| Example | Shows | +| --- | --- | +| [Configuration](examples/configuration/) | `init`, `config.yaml`, API keys, LiteLLM tuning (Ollama, LM Studio, Copilot) | +| [Commands](examples/commands/) | the everyday loop: `add` · `query` · `remove` · `recompile` · `lint` · `list` · `status` | +| [PageIndex Cloud](examples/pageindex-cloud/) | long documents: local vs. cloud indexing, and importing cloud-indexed docs | +| [Chat TUI](examples/chat/) | the interactive REPL: persistent sessions + slash commands | +| [Skill Factory](examples/skills/) | distill a redistributable agent skill from your wiki | +| [Slides](examples/slides/) | generate a single-file HTML slide deck | +| [Visualize](examples/visualize/) | render the wiki as an interactive knowledge graph | + +The compiled wiki, skill, deck, and graph in those folders were **all** generated +from a single paper — see [`examples/README.md`](examples/README.md) for the exact +commands. + # 🧩 How OpenKB Works ### Architecture From f79a0b5c870b323424fee4d33cf07832a00f5c4f Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 12:47:25 +0800 Subject: [PATCH 05/11] docs(readme): trim deep how-to, defer depth to examples/ Keep the README as a feature overview (what each command does); move the deep usage into examples/ and point to it: - collapse the Skill Factory walkthrough (output layout / install / share / iterate-from-chat / validate-eval-rollback) to a one-line pointer - replace the chat slash-command list with a pointer - tighten the skill-command table to feature-level descriptions - add a Configuration pointer for LiteLLM tuning (Ollama/LM Studio/Copilot) Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 107 +++++++----------------------------------------------- 1 file changed, 13 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index 155e01b5..ff2e4db0 100644 --- a/README.md +++ b/README.md @@ -236,9 +236,11 @@ A "generator" reads from the compiled wiki and produces something usable: an ans | Command | Output | |---|---| -| openkb skill validate [name] | Validate compiled skills (YAML frontmatter, file sizes, wikilinks, scripts). Auto-runs at end of `skill new` (`--strict` to treat warnings as failures) | -| openkb skill eval <name> | Trigger-accuracy evaluation: does the `description:` field actually fire? LLM generates eval prompts; grader LLM scores activation (`--save` persists the eval set) | -| openkb skill history <name> / openkb skill rollback <name> | Version history for skills. Each overwrite saves the previous version to `iteration-N/` with a diff; rollback restores any iteration | +| openkb skill validate [name] | Validate compiled skills (auto-runs after `skill new`) | +| openkb skill eval <name> | Check the skill triggers on the right prompts | +| openkb skill history <name> / openkb skill rollback <name> | Version history + rollback for skills | + +See **[`examples/skills/`](examples/skills/)** for how validation, evaluation, and rollback actually work. @@ -256,23 +258,9 @@ openkb chat --list # list all sessions openkb chat --delete # delete a session ``` -Inside a chat, type `/` to access slash commands (Tab to complete). - -
-More slash commands: -
- -- `/help` — list available commands -- `/status` — show knowledge base status -- `/list` — list all documents -- `/add ` — add a document or directory without leaving the chat -- `/skill new ""` — compile a skill from this chat (see below) -- `/save [name]` — export the transcript to `wiki/explorations/` -- `/clear` — start a fresh session (the current one stays on disk) -- `/lint` — run knowledge base lint -- `/exit` — exit (Ctrl-D also works) - -
+Inside a chat, type `/` to access slash commands (Tab to complete) — `/add`, +`/skill new`, `/save`, `/lint`, and more. See [`examples/chat/`](examples/chat/) +for the full list. @@ -285,80 +273,9 @@ openkb skill new karpathy-thinking \ "Reason about transformers and attention in Karpathy's style" ``` -
-Output: -
- -``` -/output/skills/karpathy-thinking/ -├── SKILL.md # YAML frontmatter + when-to-use + approach -├── references/ # depth material the agent loads on demand -│ ├── methodology.md -│ └── key-quotes.md -└── (scripts/) # optional, only if intent implies computation -``` - -Plus an auto-updated `/.claude-plugin/marketplace.json` so the whole KB is one-line installable. - -
- -
-Install locally: -
- -```bash -cp -r output/skills/karpathy-thinking ~/.claude/skills/ -``` - -
- -
-Share with others: -
- -Push your KB to GitHub, then anyone runs: - -```bash -npx skills@latest add / -``` - -
- -
-Iterate from chat: -
- -Compilation is one-shot, but follow-up edits don't have to be. Inside `openkb chat`, you can refine without re-running the whole pipeline: - -``` -/skill new karpathy-thinking "Reason about transformers like Karpathy" -[generation streams] -> description is too generic, make it about transformer implementations specifically -[agent edits SKILL.md frontmatter in place] -``` - -
- -
-Quality gates: -
- -Structural validation, trigger-accuracy + body-coverage evaluation, and full history/rollback: - -```bash -# Lint structure (auto-runs at end of `skill new`) -openkb skill validate karpathy-thinking -openkb skill validate --strict # treat warnings as failures - -# Does the description actually fire when it should? -openkb skill eval karpathy-thinking --save - -# History + rollback if a new iteration regresses -openkb skill history karpathy-thinking -openkb skill rollback karpathy-thinking --to 2 -``` - -
+→ The output layout, local install, sharing via `npx skills add`, refining from +chat, and the `validate` / `eval` / `history` / `rollback` quality gates are walked +through in **[`examples/skills/`](examples/skills/)** — with a real generated skill. ### (iii) 🗺 Visualize — *see the shape of your knowledge* @@ -398,6 +315,8 @@ Subscription-based providers that authenticate via OAuth device flow (e.g. `chat +For deeper LLM/LiteLLM tuning — timeouts for slow local runtimes (Ollama, LM Studio), `drop_params`, GitHub Copilot headers, install notes — see **[`examples/configuration/`](examples/configuration/)**. + ### PageIndex Setup Long-document retrieval is a [known challenge](https://x.com/karpathy/status/2039823314982744522) for LLMs. [PageIndex](https://github.com/VectifyAI/PageIndex) solves this with vectorless, reasoning-based retrieval, by building a hierarchical tree index that lets LLMs reason over the index for context-aware retrieval. From cb280cf938617611c3c2c7297616b3f43c0feb48 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 12:57:46 +0800 Subject: [PATCH 06/11] docs(readme): dedupe LLM setup, slim Configuration (keep PageIndex Setup) Provider/model + key setup is already covered in Getting Started > 'Set up your LLM'; Configuration > Settings repeated it. Trim Settings to the core config.yaml keys + a pointer to examples/configuration (entity_types, OAuth, LiteLLM tuning). PageIndex Setup (kept for referral) and AGENTS.md are unchanged. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index ff2e4db0..7422e761 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ openkb visualize # build + open output/visualize/graph.html ### Settings -OpenKB settings are initialized by `openkb init` and stored in `.openkb/config.yaml`: +`openkb init` writes `.openkb/config.yaml`: ```yaml model: gpt-5.4 # LLM model (any LiteLLM-supported provider) @@ -297,25 +297,7 @@ language: en # Wiki output language pageindex_threshold: 20 # PDF pages threshold for PageIndex ``` -Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/providers) (OpenAI models can omit the prefix): - -| Provider | Model example | -|---|---| -| OpenAI | `gpt-5.4` | -| Anthropic | `anthropic/claude-sonnet-4-6` | -| Gemini | `gemini/gemini-3.1-pro-preview` | - -
-Advanced options (entity_types, OAuth): -
- -`entity_types` (optional): a YAML list overriding the entity-type vocabulary used for entity pages; omit it to use the default `person`, `organization`, `place`, `product`, `work`, `event`, `other`. - -Subscription-based providers that authenticate via OAuth device flow (e.g. `chatgpt/*`, `github_copilot/*`) need no API key; OpenKB skips the missing-key warning for them. - -
- -For deeper LLM/LiteLLM tuning — timeouts for slow local runtimes (Ollama, LM Studio), `drop_params`, GitHub Copilot headers, install notes — see **[`examples/configuration/`](examples/configuration/)**. +The full settings reference — `entity_types`, OAuth providers (`chatgpt/*`, `github_copilot/*`), and LiteLLM tuning (timeouts for slow local runtimes like Ollama / LM Studio, `drop_params`, GitHub Copilot headers, install notes) — is in **[`examples/configuration/`](examples/configuration/)**. ### PageIndex Setup From 5a58d6d7df5d74753ba896a9b9d745a4dc9dbc0f Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 13:07:34 +0800 Subject: [PATCH 07/11] docs(readme): tighten Usage, add visualize/slides to Quick Start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical pass on Usage — it described the generators twice (Layer 2 table + narrative subsections): - drop the (i) Query & Chat and (iii) Visualize subsections (already in the Layer 2 table; depth is in examples/) - shorten Skill Factory to a flagship blurb + pointer (no walkthrough) - add the missing deck/slides generator to the Layer 2 table - fix the Skill Factory anchor link; remove a dead commented-out lint row Quick Start: add optional visualize + deck (deck noted as needing a theme). Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 54 +++++++++--------------------------------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 7422e761..2c0006de 100644 --- a/README.md +++ b/README.md @@ -97,8 +97,10 @@ openkb query "What are the main findings?" # 5. Or chat interactively openkb chat -# (Optional) Distill a redistributable agent skill from your wiki -openkb skill new my-expert "Reason like an expert on " +# (Optional) Turn the wiki into other outputs +openkb skill new my-expert "Reason like an expert on " # a portable agent skill +openkb visualize # an interactive knowledge graph +openkb deck new my-deck "An intro deck on " # slides (install a theme first — see examples/slides/) ``` ### Set up your LLM @@ -191,7 +193,7 @@ A single source might touch 10--15 wiki pages. Knowledge accumulates: each docum # ⚙️ Usage -OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output). +OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output). These tables are the command inventory; step-by-step walkthroughs live in [`examples/`](examples/). ## Layer 1: 🧱 Wiki Foundation — compile and maintain @@ -214,8 +216,6 @@ OpenKB commands fall into two layers: the **wiki foundation** (compile + manage | openkb recompile [<doc>] [--all] | Re-run the compile pipeline on already-indexed docs without re-indexing. Regenerates summaries and rewrites concept pages; manual edits are overwritten (`--dry-run` to preview, `--refresh-schema` to also update `wiki/AGENTS.md`) | | openkb feedback ["msg"] | File feedback by opening a prefilled GitHub issue (`--type bug/feature/question` to tag it) | - - ## Layer 2: 💡 Generators — turn the wiki into output @@ -228,7 +228,8 @@ A "generator" reads from the compiled wiki and produces something usable: an ans | openkb chat | Interactive multi-turn session over the wiki (`--resume`, `--list`, `--delete` to manage sessions) | | openkb visualize | A self-contained interactive knowledge graph at `output/visualize/graph.html` — 3D, mind-map, and radial views | | | | -| openkb skill new <skill-name> "<intent>" | Distill a redistributable agent skill from your wiki (see [Skill Factory](#-skill-factory--drop-in-a-book-out-comes-a-digital-expert) below) | +| openkb skill new <skill-name> "<intent>" | Distill a redistributable agent skill from your wiki (see [Skill Factory](#skill-factory) below) | +| openkb deck new <name> "<intent>" | Generate a single-file HTML slide deck (`--skill` picks a theme, `--critique` runs a quality pass — see [`examples/slides/`](examples/slides/)) |
More skill commands: @@ -240,50 +241,13 @@ A "generator" reads from the compiled wiki and produces something usable: an ans | openkb skill eval <name> | Check the skill triggers on the right prompts | | openkb skill history <name> / openkb skill rollback <name> | Version history + rollback for skills | -See **[`examples/skills/`](examples/skills/)** for how validation, evaluation, and rollback actually work. -
-### (i) 💬 Query & Chat — *ask the wiki* - -`openkb query "..."` answers a single question. `openkb chat` is interactive — each turn carries history, so you can dig into a topic without re-typing context. Both use the same underlying wiki and retrieval primitives. - -```bash -openkb query "What does the literature say about attention scaling?" - -openkb chat # start a new session -openkb chat --resume # resume the most recent session -openkb chat --resume 20260411 # resume by id (unique prefix works) -openkb chat --list # list all sessions -openkb chat --delete # delete a session -``` - -Inside a chat, type `/` to access slash commands (Tab to complete) — `/add`, -`/skill new`, `/save`, `/lint`, and more. See [`examples/chat/`](examples/chat/) -for the full list. - -### (ii) 🛠 Skill Factory — *drop in a book; out comes a digital expert.* +### 🛠 Skill Factory — *drop in a book; out comes a digital expert.* -The newest generator. `openkb skill new` distills an [agent skill](https://docs.claude.com/en/docs/build-with-claude/skills) from any subset of your wiki, a portable folder that major agents (Claude Code, Codex, etc.) can install and load natively. Drop in a book's worth of papers; out comes a specialist that other agents can call on. - -```bash -openkb skill new karpathy-thinking \ - "Reason about transformers and attention in Karpathy's style" -``` - -→ The output layout, local install, sharing via `npx skills add`, refining from -chat, and the `validate` / `eval` / `history` / `rollback` quality gates are walked -through in **[`examples/skills/`](examples/skills/)** — with a real generated skill. - -### (iii) 🗺 Visualize — *see the shape of your knowledge* - -`openkb visualize` renders the wiki as a single self-contained, offline HTML page with three views of the same knowledge base — a **3D** force graph, an OpenKB-rooted **mind-map**, and a **radial** tree — coloured by type and linked by `[[wikilinks]]`. - -```bash -openkb visualize # build + open output/visualize/graph.html -``` +The flagship generator: `openkb skill new` distills a portable [agent skill](https://docs.claude.com/en/docs/build-with-claude/skills) from your wiki that Claude Code, Codex, and Gemini can install and load natively. Drop in a book's worth of papers; out comes a specialist other agents can call on. → A real generated skill, plus install / share / `eval` / rollback, is walked through in **[`examples/skills/`](examples/skills/)**. # 🔧 Configuration From d4244b00af5c56d02b39635350f44b6427bfa127 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 13:16:41 +0800 Subject: [PATCH 08/11] fix(deck): bundle built-in deck themes so `deck new` works after pip install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deck themes (openkb-deck-neon / openkb-deck-editorial) and the html critic lived only in the repo's top-level skills/, which the wheel didn't ship — so a fresh `pip install openkb` failed `deck new` / `--critique` (and chat `/deck`, `/critique`) with "Deck skill ... is not installed". - force-include the three skills into the wheel at openkb/_skills/ - scan_local_skills also scans bundled roots (wheel openkb/_skills + the source-checkout skills/), at lowest priority so KB/user skills still override - tests: isolate bundled roots in the scan unit tests; add coverage for bundled discovery + KB-overrides-bundled - docs: drop the now-stale "install a theme first" notes Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 2 +- examples/slides/README.md | 8 ++++---- openkb/agent/skills.py | 21 ++++++++++++++++++++- pyproject.toml | 8 ++++++++ tests/test_skills.py | 27 +++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2c0006de..1612c601 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ openkb chat # (Optional) Turn the wiki into other outputs openkb skill new my-expert "Reason like an expert on " # a portable agent skill openkb visualize # an interactive knowledge graph -openkb deck new my-deck "An intro deck on " # slides (install a theme first — see examples/slides/) +openkb deck new my-deck "An intro deck on " # slides — a single-file HTML deck ``` ### Set up your LLM diff --git a/examples/slides/README.md b/examples/slides/README.md index ce78b600..b668acb5 100644 --- a/examples/slides/README.md +++ b/examples/slides/README.md @@ -51,16 +51,16 @@ openkb deck new okf-pitch "Pitch OpenKB to a data team" \ ## Themes Decks are rendered by a **deck skill** — a SKILL.md that defines the visual -direction and a slide grammar. Two ship in the repo: +direction and a slide grammar. Two ship with OpenKB (no install needed): | Skill | Look | | --- | --- | | [`openkb-deck-neon`](../../skills/openkb-deck-neon/SKILL.md) *(default)* | Dark "Aurora Glass" — near-black background, teal/sky/magenta/amber neon, glassmorphism panels. | | [`openkb-deck-editorial`](../../skills/openkb-deck-editorial/SKILL.md) | Warm "Editorial Monocle" — cream, serif, brick-red accent; a printed-page feel. | -Drop a third-party deck skill into `~/.openkb/skills/` or `/skills/` and select -it with `--skill`. (A KB only has a deck theme available if one is installed in one -of those locations — copy it from the repo's [`skills/`](../../skills/) if needed.) +Want a custom look? Drop a third-party deck skill into `~/.openkb/skills/` or +`/skills/` and select it with `--skill` (a same-named skill there overrides +the built-in). --- diff --git a/openkb/agent/skills.py b/openkb/agent/skills.py index 59023ceb..edbd1af8 100644 --- a/openkb/agent/skills.py +++ b/openkb/agent/skills.py @@ -9,6 +9,9 @@ 1. ``/skills/`` — project-local skills shipped with the KB 2. ``~/.openkb/skills/`` — user-global skills 3. ``~/.claude/skills/`` — Claude Code's skill dir (interop bonus) + 4. bundled skills — built-in deck themes / critic shipped with + the package (lowest priority, so the roots + above can override them) Skill file layout:: @@ -39,6 +42,17 @@ "~/.claude/skills", ) +# Skills shipped with the package so the built-in deck themes + html critic +# work out of the box (no manual install). Two candidates cover both install +# modes; whichever exists is scanned, at lowest priority: +# - wheel install: force-included at ``openkb/_skills/`` +# - editable/source checkout: the repo's top-level ``skills/`` +_PKG_DIR = Path(__file__).resolve().parent.parent +BUNDLED_SKILL_ROOTS: Tuple[str, ...] = ( + str(_PKG_DIR / "_skills"), + str(_PKG_DIR.parent / "skills"), +) + def _parse_frontmatter(text: str) -> Tuple[dict, str]: """Return ``(metadata_dict, body)`` from a markdown file with YAML @@ -77,7 +91,12 @@ def scan_local_skills( List of skill metadata dicts. Empty if no skills found. """ seen: dict[str, dict[str, str]] = {} - roots = list(DEFAULT_SKILL_ROOTS) + [str(r) for r in extra_roots] + # Bundled roots go last so KB/user/Claude skills override the built-ins. + roots = ( + list(DEFAULT_SKILL_ROOTS) + + [str(r) for r in extra_roots] + + list(BUNDLED_SKILL_ROOTS) + ) for root_spec in roots: root = Path(root_spec).expanduser() if not root.is_absolute(): diff --git a/pyproject.toml b/pyproject.toml index 35eb031a..694ab5e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,3 +66,11 @@ source = "vcs" [tool.hatch.build.targets.wheel] packages = ["openkb"] + +# Bundle the built-in deck themes + html critic into the wheel so +# `openkb deck new` / `--critique` / chat `/deck` work right after +# `pip install` (scan_local_skills also looks under openkb/_skills/). +[tool.hatch.build.targets.wheel.force-include] +"skills/openkb-deck-neon" = "openkb/_skills/openkb-deck-neon" +"skills/openkb-deck-editorial" = "openkb/_skills/openkb-deck-editorial" +"skills/openkb-html-critic" = "openkb/_skills/openkb-html-critic" diff --git a/tests/test_skills.py b/tests/test_skills.py index 70dbabf3..d63a75e6 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -29,6 +29,9 @@ def _isolate_home(monkeypatch, tmp_path): fake_home = tmp_path / "isolated-home" fake_home.mkdir() monkeypatch.setenv("HOME", str(fake_home)) + # Neutralize the package-bundled roots so these unit tests exercise the + # scanning primitive in isolation; bundled discovery is covered explicitly. + monkeypatch.setattr("openkb.agent.skills.BUNDLED_SKILL_ROOTS", ()) def _write_skill( @@ -161,6 +164,30 @@ def test_scan_default_roots_listed(tmp_path: Path): ) +def test_scan_includes_bundled_skills(tmp_path: Path, monkeypatch): + """Skills shipped with the package (deck themes / critic) are + discovered even for a KB with no local ``skills/`` — this is what makes + ``deck new`` work right after ``pip install``.""" + bundled = tmp_path / "bundled" + _write_skill(bundled, "openkb-deck-neon", description="built-in deck theme") + monkeypatch.setattr("openkb.agent.skills.BUNDLED_SKILL_ROOTS", (str(bundled),)) + names = {s["name"] for s in scan_local_skills(tmp_path)} + assert "openkb-deck-neon" in names + + +def test_kb_skill_overrides_bundled(tmp_path: Path, monkeypatch): + """Bundled roots are scanned last (lowest priority): a same-named skill + in the KB wins, so users can customize a built-in theme.""" + bundled = tmp_path / "bundled" + _write_skill(bundled, "openkb-deck-neon", description="BUILT-IN") + monkeypatch.setattr("openkb.agent.skills.BUNDLED_SKILL_ROOTS", (str(bundled),)) + _write_skill(tmp_path / "skills", "openkb-deck-neon", description="KB OVERRIDE") + match = next( + s for s in scan_local_skills(tmp_path) if s["name"] == "openkb-deck-neon" + ) + assert match["description"] == "KB OVERRIDE" + + # ─── _parse_frontmatter ────────────────────────────────────────────────── From 9a0daff4c508b0d1c6954092f8ec1da9f89239de Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 13:26:44 +0800 Subject: [PATCH 09/11] docs(readme): fold Examples into Usage as per-command links The standalone Examples section duplicated the Usage tables. Remove it and link each command to its specific walkthrough instead: - Layer 2 generators table gets an Example column (query/chat/visualize/skill/deck) - a wiki-foundation 'everyday loop' link under Layer 1 -> examples/commands/ - PageIndex example linked from PageIndex Setup; config from Configuration Every example folder stays linked from its natural context; no duplicate table. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 1612c601..e553dedf 100644 --- a/README.md +++ b/README.md @@ -115,26 +115,6 @@ Create a `.env` file with your LLM API key: LLM_API_KEY=your_llm_api_key ``` -# 📚 Examples - -Worked examples for each feature live in [`examples/`](examples/) — every case is a -folder with a short walkthrough plus the **real artifact** OpenKB generated for it -(a compiled wiki, a distilled skill, an HTML slide deck, an interactive graph). - -| Example | Shows | -| --- | --- | -| [Configuration](examples/configuration/) | `init`, `config.yaml`, API keys, LiteLLM tuning (Ollama, LM Studio, Copilot) | -| [Commands](examples/commands/) | the everyday loop: `add` · `query` · `remove` · `recompile` · `lint` · `list` · `status` | -| [PageIndex Cloud](examples/pageindex-cloud/) | long documents: local vs. cloud indexing, and importing cloud-indexed docs | -| [Chat TUI](examples/chat/) | the interactive REPL: persistent sessions + slash commands | -| [Skill Factory](examples/skills/) | distill a redistributable agent skill from your wiki | -| [Slides](examples/slides/) | generate a single-file HTML slide deck | -| [Visualize](examples/visualize/) | render the wiki as an interactive knowledge graph | - -The compiled wiki, skill, deck, and graph in those folders were **all** generated -from a single paper — see [`examples/README.md`](examples/README.md) for the exact -commands. - # 🧩 How OpenKB Works ### Architecture @@ -193,7 +173,7 @@ A single source might touch 10--15 wiki pages. Knowledge accumulates: each docum # ⚙️ Usage -OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output). These tables are the command inventory; step-by-step walkthroughs live in [`examples/`](examples/). +OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output). Each links to a concrete walkthrough — a real artifact OpenKB generated from one sample paper (browse them all in [`examples/`](examples/)). ## Layer 1: 🧱 Wiki Foundation — compile and maintain @@ -218,18 +198,20 @@ OpenKB commands fall into two layers: the **wiki foundation** (compile + manage +→ **Example:** the everyday loop walked through end to end — [`examples/commands/`](examples/commands/). + ## Layer 2: 💡 Generators — turn the wiki into output A "generator" reads from the compiled wiki and produces something usable: an answer, a conversation, a skill folder. The wiki is the substrate; generators are the surfaces. -| Command | Output | -|---|---| -| openkb query "question" | A grounded answer with citations (`--save` to persist to `wiki/explorations/`) | -| openkb chat | Interactive multi-turn session over the wiki (`--resume`, `--list`, `--delete` to manage sessions) | -| openkb visualize | A self-contained interactive knowledge graph at `output/visualize/graph.html` — 3D, mind-map, and radial views | -| | | -| openkb skill new <skill-name> "<intent>" | Distill a redistributable agent skill from your wiki (see [Skill Factory](#skill-factory) below) | -| openkb deck new <name> "<intent>" | Generate a single-file HTML slide deck (`--skill` picks a theme, `--critique` runs a quality pass — see [`examples/slides/`](examples/slides/)) | +| Command | Output | Example | +|---|---|---| +| openkb query "question" | A grounded answer with citations (`--save` to persist to `wiki/explorations/`) | [query & save](examples/commands/) | +| openkb chat | Interactive multi-turn session over the wiki (`--resume`, `--list`, `--delete` to manage sessions) | [chat](examples/chat/) | +| openkb visualize | A self-contained interactive knowledge graph at `output/visualize/graph.html` — 3D, mind-map, and radial views | [visualize](examples/visualize/) | +| | | | +| openkb skill new <skill-name> "<intent>" | Distill a redistributable agent skill from your wiki (see [Skill Factory](#skill-factory) below) | [skills](examples/skills/) | +| openkb deck new <name> "<intent>" | Generate a single-file HTML slide deck (`--skill` picks a theme, `--critique` runs a quality pass) | [slides](examples/slides/) |
More skill commands: @@ -283,6 +265,8 @@ Set `PAGEINDEX_API_KEY` in your `.env` to enable cloud features: PAGEINDEX_API_KEY=your_pageindex_api_key ``` +→ **Example:** local vs. cloud indexing, and importing a cloud-indexed doc — [`examples/pageindex-cloud/`](examples/pageindex-cloud/). + ### AGENTS.md The `wiki/AGENTS.md` file defines wiki structure and conventions. It's the LLM's instruction manual for maintaining the wiki. Customize it to change how your wiki is organized. From ba0f56765ca4b15485f454c69959b839ee4935c6 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 13:38:01 +0800 Subject: [PATCH 10/11] docs(examples): fix review nits (threshold boundary, broken link, wording) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From a critical pass over the examples docs: - threshold is >= (a 20-page PDF is long, converter.py:183): fix the boundary in pageindex-cloud (<= / > -> < / >=) and configuration ("more than" -> "or more") - fix the Bishop sample link — href pointed at ../docs/ (a dir), not the PDF - remove --keep-empty keeps concept AND entity pages, not just concepts - deck_grammar quote: add the kind_attr line so it matches the real SKILL.md Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- examples/commands/README.md | 2 +- examples/configuration/README.md | 2 +- examples/pageindex-cloud/README.md | 6 +++--- examples/slides/README.md | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/commands/README.md b/examples/commands/README.md index 9c134762..f0310ab5 100644 --- a/examples/commands/README.md +++ b/examples/commands/README.md @@ -77,7 +77,7 @@ Identify a doc by filename, its slug, or a unique substring: openkb remove attention-is-all-you-need.pdf # exact filename openkb remove attention # unique substring openkb remove attention --dry-run # preview, change nothing -openkb remove attention --keep-empty # keep concept pages it solely sourced +openkb remove attention --keep-empty # keep concept & entity pages it solely sourced openkb remove attention --keep-raw --yes # leave raw/ file, no prompt ``` diff --git a/examples/configuration/README.md b/examples/configuration/README.md index 14a8c199..9ead83ca 100644 --- a/examples/configuration/README.md +++ b/examples/configuration/README.md @@ -94,7 +94,7 @@ pageindex_threshold: 20 # PDF pages threshold for PageIndex | --- | --- | --- | | `model` | `gpt-5.4` | LLM used for all compile/query/chat work. | | `language` | `en` | Language the wiki is written in. | -| `pageindex_threshold` | `20` | PDFs with **more** pages than this take the long-doc (PageIndex) path; fewer go through the short-doc path. See [`pageindex-cloud/`](../pageindex-cloud/). | +| `pageindex_threshold` | `20` | PDFs with this many pages **or more** take the long-doc (PageIndex) path; shorter ones go through the short-doc path. See [`pageindex-cloud/`](../pageindex-cloud/). | | `entity_types` | 7 defaults | Custom vocabulary for entity pages. `other` is always kept. | | `litellm:` | – | A pass-through block for LiteLLM. See below. | diff --git a/examples/pageindex-cloud/README.md b/examples/pageindex-cloud/README.md index 344d6352..dbcdee23 100644 --- a/examples/pageindex-cloud/README.md +++ b/examples/pageindex-cloud/README.md @@ -7,7 +7,7 @@ a document already indexed in PageIndex Cloud. > **Try it with** [`../docs/deepseek-r1.pdf`](../docs/deepseek-r1.pdf) (~22 pages, > just over the threshold → long-doc path) or -> [`../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf`](../docs/) +> [`../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf`](../docs/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf) > (700+ pages → exercises cloud OCR + page windowing). --- @@ -18,8 +18,8 @@ When you `openkb add` a PDF, its page count decides the path: | Page count | Path | Engine | | --- | --- | --- | -| `≤ pageindex_threshold` (default 20) | short-doc | markitdown → LLM reads full text | -| `> pageindex_threshold` | long-doc | PageIndex tree index | +| `< pageindex_threshold` (default 20) | short-doc | markitdown → LLM reads full text | +| `≥ pageindex_threshold` | long-doc | PageIndex tree index | For the long-doc path, whether it runs **locally** or in the **cloud** depends only on one environment variable: diff --git a/examples/slides/README.md b/examples/slides/README.md index b668acb5..bfa94c32 100644 --- a/examples/slides/README.md +++ b/examples/slides/README.md @@ -75,6 +75,7 @@ od: mode: deck output_path_template: "output/decks/{slug}/index.html" deck_grammar: + kind_attr: data-type required: [cover, closing] allowed: [cover, chapter, thesis, quote, compare, data, closing] min_distinct: 4 From 6a8660c18c3e5817e9bbc380a09b8e502863eaa4 Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 26 Jun 2026 13:46:32 +0800 Subject: [PATCH 11/11] chore(model): default to gpt-5.4 (unify with config example + docs) The code default (DEFAULT_CONFIG) was gpt-5.4-mini while config.yaml.example and all docs use gpt-5.4. Standardize on gpt-5.4: - DEFAULT_CONFIG model gpt-5.4-mini -> gpt-5.4 (+ update test_config assertion) - cli.py: fix the cross-family gpt-4o-mini fallback and the --model help example to gpt-5.4; lead the init model list with gpt-5.4 - examples/README: showcase commands use gpt-5.4 Also drop the empty separator row in the README Layer 2 generators table. Claude-Session: https://claude.ai/code/session_018WiFnTo1YW9mtw47Fzir9K --- README.md | 1 - examples/README.md | 4 ++-- openkb/cli.py | 6 +++--- openkb/config.py | 2 +- tests/test_config.py | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e553dedf..8e2920c1 100644 --- a/README.md +++ b/README.md @@ -209,7 +209,6 @@ A "generator" reads from the compiled wiki and produces something usable: an ans | openkb query "question" | A grounded answer with citations (`--save` to persist to `wiki/explorations/`) | [query & save](examples/commands/) | | openkb chat | Interactive multi-turn session over the wiki (`--resume`, `--list`, `--delete` to manage sessions) | [chat](examples/chat/) | | openkb visualize | A self-contained interactive knowledge graph at `output/visualize/graph.html` — 3D, mind-map, and radial views | [visualize](examples/visualize/) | -| | | | | openkb skill new <skill-name> "<intent>" | Distill a redistributable agent skill from your wiki (see [Skill Factory](#skill-factory) below) | [skills](examples/skills/) | | openkb deck new <name> "<intent>" | Generate a single-file HTML slide deck (`--skill` picks a theme, `--critique` runs a quality pass) | [slides](examples/slides/) | diff --git a/examples/README.md b/examples/README.md index 012b0ec4..01d184e1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,11 +24,11 @@ plus the **real artifact** OpenKB produced for it (generated, not hand-written). The compiled `sample-wiki/`, the skill, the deck, and the graph were **all** generated by running OpenKB over a single document — [`docs/attention-is-all-you-need.pdf`](docs/attention-is-all-you-need.pdf) — with -`gpt-5.4-mini`: +`gpt-5.4`: ```bash mkdir my-kb && cd my-kb -openkb init --model gpt-5.4-mini --language en +openkb init --model gpt-5.4 --language en openkb add /path/to/examples/docs/attention-is-all-you-need.pdf # → commands/sample-wiki/ openkb query "How does attention replace recurrence?" --save # → commands/sample-wiki/explorations/ diff --git a/openkb/cli.py b/openkb/cli.py index 1c415d01..28694987 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -711,7 +711,7 @@ def _stdin_is_tty() -> bool: callback=_model_option_callback, help=( "LLM in LiteLLM provider/model format " - "(e.g. 'gpt-5.4-mini', 'anthropic/claude-sonnet-4-6'). " + "(e.g. 'gpt-5.4', 'anthropic/claude-sonnet-4-6'). " "Skips the interactive prompt when set." ), ) @@ -730,7 +730,7 @@ def init(model, language): # Interactive prompts click.echo("Pick an LLM in `provider/model` LiteLLM format:") - click.echo(" OpenAI: gpt-5.4-mini, gpt-5.4") + click.echo(" OpenAI: gpt-5.4, gpt-5.4-mini") click.echo(" Anthropic: anthropic/claude-sonnet-4-6, anthropic/claude-opus-4-6") click.echo(" Gemini: gemini/gemini-3.1-pro-preview, gemini/gemini-3-flash-preview") click.echo(" DeepSeek: deepseek/deepseek-v4-flash, deepseek/deepseek-v4-pro") @@ -969,7 +969,7 @@ def _cleanup_pageindex( _setup_llm_key(kb_dir) config = load_config(openkb_dir / "config.yaml") - model = config.get("model", DEFAULT_CONFIG.get("model", "gpt-4o-mini")) + model = config.get("model", DEFAULT_CONFIG.get("model", "gpt-5.4")) client = PageIndexClient(model=model, storage_path=str(openkb_dir)) col = client.collection() diff --git a/openkb/config.py b/openkb/config.py index b4cf8117..52082c62 100644 --- a/openkb/config.py +++ b/openkb/config.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) DEFAULT_CONFIG: dict[str, Any] = { - "model": "gpt-5.4-mini", + "model": "gpt-5.4", "language": "en", "pageindex_threshold": 20, } diff --git a/tests/test_config.py b/tests/test_config.py index 5dd870f1..5fd78b44 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -21,7 +21,7 @@ def test_default_config_keys(): def test_default_config_values(): - assert DEFAULT_CONFIG["model"] == "gpt-5.4-mini" + assert DEFAULT_CONFIG["model"] == "gpt-5.4" assert DEFAULT_CONFIG["language"] == "en" assert DEFAULT_CONFIG["pageindex_threshold"] == 20