Schema 文档自动生成器
一个小工具:扫描 MariaDB 的 INFORMATION_SCHEMA,导出 LLM / Agent 友好的 Markdown / JSON / OpenAPI
让 AI 准确写 SQL 的第一步是让 AI 知道 schema 的真面目。直接喂 SHOW CREATE TABLE 给 LLM 效率很差——括号嵌套、引号转义、注释丢失、关系不直观。
这一篇给你一个 ~150 行的工具,从 INFORMATION_SCHEMA 出发,生成三种格式:
- Markdown(给人看 + 喂 LLM)
- JSON(给程序消费)
- OpenAPI Tool Definition(直接挂 MCP / Function Calling)
输出样例
Markdown
# 数据库 `app`
## `orders`
**电商订单主表** · 行数 ~ 1.2M · 引擎 InnoDB · 字符集 utf8mb4
| 列 | 类型 | NULL | 默认 | 说明 |
|---|---|---|---|---|
| `id` | bigint(20) | NO | AUTO_INCREMENT | 订单号,全局唯一 (PK) |
| `user_id` | bigint(20) | NO | — | FK → `users.id`,下单用户 |
| `amount_cents` | int(11) | NO | — | 金额,以分为单位 |
| `status` | enum('pending','paid','shipped','cancelled') | NO | 'pending' | 订单状态机 |
| `created_at` | datetime | NO | CURRENT_TIMESTAMP | 下单时间 |
**索引**
- `PRIMARY` (id)
- `idx_user_created` (user_id, created_at)
**外键**
- `fk_orders_user` (`user_id`) → `users` (`id`)JSON
{
"database": "app",
"tables": [
{
"name": "orders",
"comment": "电商订单主表",
"engine": "InnoDB",
"row_estimate": 1200000,
"columns": [
{ "name": "id", "type": "bigint(20)", "nullable": false, "comment": "订单号,全局唯一", "key": "PRI" },
{ "name": "user_id", "type": "bigint(20)", "nullable": false, "comment": "FK → users.id" }
],
"indexes": [{ "name": "PRIMARY", "columns": ["id"], "unique": true }],
"foreign_keys": [
{ "name": "fk_orders_user", "columns": ["user_id"], "references": { "table": "users", "columns": ["id"] } }
]
}
]
}OpenAPI Tool Definition
{
"type": "function",
"function": {
"name": "get_order_by_id",
"description": "Fetch one order from the `orders` table by id",
"parameters": {
"type": "object",
"properties": { "id": { "type": "integer", "description": "订单号,全局唯一" } },
"required": ["id"]
}
}
}实现(TypeScript)
// src/schema-export.ts
import { createConnection, type Connection } from 'mysql2/promise';
interface Column {
name: string;
type: string;
nullable: boolean;
default: string | null;
comment: string;
key: 'PRI' | 'UNI' | 'MUL' | '';
extra: string;
}
interface ForeignKey {
name: string;
columns: string[];
references: { table: string; columns: string[] };
}
interface Table {
name: string;
comment: string;
engine: string;
row_estimate: number;
columns: Column[];
indexes: { name: string; columns: string[]; unique: boolean }[];
foreign_keys: ForeignKey[];
}
export async function exportSchema(db: Connection, schema: string): Promise<Table[]> {
const [tables] = await db.query<any[]>(
`SELECT table_name AS name, table_comment AS comment, engine, table_rows AS rows
FROM information_schema.tables
WHERE table_schema = ? AND table_type = 'BASE TABLE'
ORDER BY table_name`,
[schema],
);
return Promise.all(
tables.map(async (t): Promise<Table> => {
const [cols] = await db.query<any[]>(
`SELECT column_name AS name, column_type AS type,
is_nullable, column_default, column_comment AS comment,
column_key, extra
FROM information_schema.columns
WHERE table_schema = ? AND table_name = ?
ORDER BY ordinal_position`,
[schema, t.name],
);
const [indexes] = await db.query<any[]>(
`SELECT index_name, column_name, seq_in_index, non_unique
FROM information_schema.statistics
WHERE table_schema = ? AND table_name = ?
ORDER BY index_name, seq_in_index`,
[schema, t.name],
);
const [fks] = await db.query<any[]>(
`SELECT constraint_name, column_name, referenced_table_name, referenced_column_name
FROM information_schema.key_column_usage
WHERE table_schema = ? AND table_name = ? AND referenced_table_name IS NOT NULL
ORDER BY constraint_name, ordinal_position`,
[schema, t.name],
);
return {
name: t.name,
comment: t.comment ?? '',
engine: t.engine,
row_estimate: Number(t.rows ?? 0),
columns: cols.map((c) => ({
name: c.name,
type: c.type,
nullable: c.is_nullable === 'YES',
default: c.column_default,
comment: c.comment ?? '',
key: c.column_key ?? '',
extra: c.extra ?? '',
})),
indexes: groupBy(indexes, 'index_name').map((g) => ({
name: g[0].index_name,
columns: g.map((r) => r.column_name),
unique: g[0].non_unique === 0,
})),
foreign_keys: groupBy(fks, 'constraint_name').map((g) => ({
name: g[0].constraint_name,
columns: g.map((r) => r.column_name),
references: {
table: g[0].referenced_table_name,
columns: g.map((r) => r.referenced_column_name),
},
})),
};
}),
);
}
function groupBy<T>(arr: T[], key: keyof T): T[][] {
const out = new Map<string, T[]>();
for (const x of arr) {
const k = String(x[key]);
if (!out.has(k)) out.set(k, []);
out.get(k)!.push(x);
}
return [...out.values()];
}
export function toMarkdown(tables: Table[], schema: string): string {
let md = `# 数据库 \`${schema}\`\n\n`;
for (const t of tables) {
md += `## \`${t.name}\`\n`;
md += `**${t.comment || '(无描述)'}** · 行数 ~ ${t.row_estimate.toLocaleString()} · 引擎 ${t.engine}\n\n`;
md += `| 列 | 类型 | NULL | 默认 | 说明 |\n|---|---|---|---|---|\n`;
for (const c of t.columns) {
md += `| \`${c.name}\` | ${c.type} | ${c.nullable ? 'YES' : 'NO'} | ${c.default ?? '—'} | ${c.comment} |\n`;
}
if (t.indexes.length) {
md += `\n**索引**\n`;
for (const i of t.indexes) md += `- \`${i.name}\` (${i.columns.join(', ')})${i.unique ? ' UNIQUE' : ''}\n`;
}
if (t.foreign_keys.length) {
md += `\n**外键**\n`;
for (const f of t.foreign_keys)
md += `- \`${f.name}\` (${f.columns.join(', ')}) → \`${f.references.table}\` (${f.references.columns.join(', ')})\n`;
}
md += '\n';
}
return md;
}
export function toOpenAPITools(tables: Table[]): object[] {
const tools: object[] = [];
for (const t of tables) {
const pk = t.columns.find((c) => c.key === 'PRI');
if (pk) {
tools.push({
type: 'function',
function: {
name: `get_${t.name}_by_${pk.name}`,
description: `Fetch one row from \`${t.name}\` by ${pk.name}. ${t.comment}`,
parameters: {
type: 'object',
properties: { [pk.name]: { type: sqlTypeToJsonType(pk.type), description: pk.comment } },
required: [pk.name],
},
},
});
}
tools.push({
type: 'function',
function: {
name: `list_${t.name}`,
description: `List rows from \`${t.name}\` with optional filters.`,
parameters: {
type: 'object',
properties: {
limit: { type: 'integer', default: 50 },
order_by: { type: 'string', enum: t.columns.map((c) => c.name) },
},
},
},
});
}
return tools;
}
function sqlTypeToJsonType(t: string): string {
if (/int|decimal|float|double|bigint|tinyint/i.test(t)) return 'integer';
if (/bool/i.test(t)) return 'boolean';
return 'string';
}CLI 使用
// bin/schema-doc.ts
#!/usr/bin/env node
import { createConnection } from 'mysql2/promise';
import { exportSchema, toMarkdown, toOpenAPITools } from '../src/schema-export.js';
import { writeFileSync } from 'fs';
const url = process.env.MARIADB_URL!;
const schema = process.env.MARIADB_SCHEMA ?? new URL(url).pathname.slice(1);
const db = await createConnection({ uri: url });
const tables = await exportSchema(db, schema);
writeFileSync(`schema.md`, toMarkdown(tables, schema));
writeFileSync(`schema.json`, JSON.stringify(tables, null, 2));
writeFileSync(`schema.tools.json`, JSON.stringify(toOpenAPITools(tables), null, 2));
await db.end();
console.log(`exported ${tables.length} tables`);跑:
MARIADB_URL='mysql://readonly:xxx@host:3306/app' tsx bin/schema-doc.ts跟 LLM 怎么配合
const schemaMd = readFileSync('schema.md', 'utf8');
const r = await openai.chat.completions.create({
model: 'gpt-5',
messages: [
{ role: 'system', content: `You are a SQL assistant for MariaDB.\n\n## Schema\n${schemaMd}` },
{ role: 'user', content: question },
],
});或者跟 Function Calling 一起用:直接把 schema.tools.json 注册成 tools,让模型按 tool 调用而不是写裸 SQL。
自动化保鲜
每次 schema 变更后跑一次。可以接到 CI:
# .github/workflows/schema-export.yml
on:
push:
paths: ['db/migrations/**']
jobs:
export:
runs-on: ubuntu-latest
services:
mariadb:
image: mariadb:11.4
env: { MARIADB_ROOT_PASSWORD: x }
ports: ['3306:3306']
steps:
- uses: actions/checkout@v4
- run: pnpm install && pnpm db:migrate && pnpm schema:export
- uses: stefanzweifel/git-auto-commit-action@v5
with: { commit_message: 'chore: refresh schema docs' }进阶
- 加 ER 图导出:
exportSchema已经有了外键,多写几行就能生成 MermaiderDiagram - 加 PII 标注:扫描列名(
email、phone、id_card)自动打 tag,给 LLM 看时打码 - 加 样本数据:每张表随机抽 3 行(脱敏后)放进 schema.md 帮助 LLM 理解数据分布
- 加 diff 模式:比较两次导出,输出"自上次以来 schema 变了哪些",给 release notes 用
延伸
- Text-to-SQL 方法论 —— schema 检索那一步就用这个
- MCP Server 实战 —— 自动暴露这些 tools
- LLM 友好的 schema 设计原则