我来介绍几种在本地保存 OpenClaw 数据的方法:

直接保存到文件
JSON 格式(推荐)
import json
data = {"key": "value", "items": [...]}
# 保存
with open('openclaw_data.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 读取
with open('openclaw_data.json', 'r', encoding='utf-8') as f:
loaded_data = json.load(f)
CSV 格式(适合表格数据)
import csv
data = [
['name', 'age', 'city'],
['Alice', 25, 'Beijing'],
['Bob', 30, 'Shanghai']
]
with open('openclaw_data.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerows(data)
文本文件
# 保存文本
text_data = "这是OpenClaw抓取的数据..."
with open('openclaw_data.txt', 'w', encoding='utf-8') as f:
f.write(text_data)
# 或保存为 Markdown
with open('openclaw_data.md', 'w', encoding='utf-8') as f:
f.write("# OpenClaw 数据\n\n")
f.write(f"抓取时间: {datetime.now()}\n\n")
使用 SQLite 数据库
import sqlite3
import json
# 创建数据库
conn = sqlite3.connect('openclaw_data.db')
cursor = conn.cursor()
# 创建表
cursor.execute('''
CREATE TABLE IF NOT EXISTS crawl_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT,TEXT,
content TEXT,
metadata TEXT,
crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 插入数据
data = {
'url': 'https://example.com',: '示例标题',
'content': '网页内容...',
'meta': {'key': 'value'}
}
cursor.execute('''
INSERT INTO crawl_results (url, title, content, metadata)
VALUES (?, ?, ?, ?)
''', (
data['url'],
data['title'],
data['content'],
json.dumps(data['meta'])
))
conn.commit()
conn.close()
使用配置文件保存设置
创建 config.json:
{
"save_path": "./data/",
"file_format": "json",
"auto_save": true,
"backup_enabled": true,
"compression": false
}
import json
import os
from pathlib import Path
class OpenClawLocalSaver:
def __init__(self, config_path='config.json'):
self.config = self.load_config(config_path)
self.setup_directories()
def load_config(self, config_path):
if os.path.exists(config_path):
with open(config_path, 'r') as f:
return json.load(f)
return {
'save_path': './crawl_data/',
'default_format': 'json'
}
def setup_directories(self):
Path(self.config['save_path']).mkdir(parents=True, exist_ok=True)
def save_data(self, data, filename=None):
if filename is None:
filename = f"crawl_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
save_path = Path(self.config['save_path']) / f"{filename}.json"
with open(save_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"数据已保存到: {save_path}")
return save_path
高级保存功能
带版本控制的保存
import hashlib
import json
from datetime import datetime
def save_with_version(data, base_filename):
# 生成数据哈希作为版本标识
data_hash = hashlib.md5(
json.dumps(data, sort_keys=True).encode()
).hexdigest()[:8]
timestamp = datetime.now().strftime('%Y%m%d')
filename = f"{base_filename}_{timestamp}_{data_hash}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
return filename
增量保存
import json
import os
from datetime import datetime
class IncrementalSaver:
def __init__(self, base_dir='./data/'):
self.base_dir = base_dir
self.today = datetime.now().strftime('%Y-%m-%d')
self.today_file = os.path.join(base_dir, f"crawl_{self.today}.json")
# 加载已有数据或创建新文件
if os.path.exists(self.today_file):
with open(self.today_file, 'r', encoding='utf-8') as f:
self.existing_data = json.load(f)
else:
self.existing_data = []
def save_incrementally(self, new_data):
# 去重逻辑(根据你的需求定制)
existing_urls = {item.get('url') for item in self.existing_data}
for item in new_data:
if item.get('url') not in existing_urls:
self.existing_data.append(item)
# 保存更新后的数据
with open(self.today_file, 'w', encoding='utf-8') as f:
json.dump(self.existing_data, f, ensure_ascii=False, indent=2)
批量保存
import json
from pathlib import Path
from datetime import datetime
def batch_save(data_list, output_dir='./output/'):
"""批量保存多个数据项"""
Path(output_dir).mkdir(exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
for i, data in enumerate(data_list):
filename = f"{output_dir}/batch_{timestamp}_{i+1:04d}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 同时保存索引文件
index = {
'batch_time': timestamp,
'total_files': len(data_list),
'files': [f"batch_{timestamp}_{i+1:04d}.json"
for i in range(len(data_list))]
}
with open(f"{output_dir}/index_{timestamp}.json", 'w') as f:
json.dump(index, f, indent=2)
使用建议
-
选择合适格式:
- 结构化数据 → JSON
- 表格数据 → CSV
- 文档/日志 → TXT/Markdown
- 需要查询 → SQLite
-
目录结构示例:
openclaw_data/ ├── raw/ # 原始数据 ├── processed/ # 处理后的数据 ├── logs/ # 日志文件 └── config.json # 配置文件 -
添加错误处理:
def safe_save(data, filename): try: with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) return True except Exception as e: print(f"保存失败: {e}") # 尝试备份保存 backup_filename = f"{filename}.bak" with open(backup_filename, 'w') as f: f.write(str(data)) return False
根据你的具体需求选择合适的方法,如果需要更具体的实现,请告诉我你的数据结构和用途!
版权声明:除非特别标注,否则均为本站原创文章,转载时请以链接形式注明文章出处。