Copy
import requests
import time
class DeepLookupAPI:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://api.brightdata.com/datasets/deep_lookup/v1"
self.headers = {"Authorization": f"Bearer {api_key}"}
def research_with_spec(self, query, columns, limit=100):
# 创建详细规范
spec = {
"name": "companies",
"query": query,
"title": query.replace("Find all ", ""),
"columns": columns
}
# 触发研究
trigger_response = requests.post(
f"{self.base_url}/trigger",
headers=self.headers,
json={
"query": query,
"spec": spec,
"result_limit": limit
}
).json()
request_id = trigger_response["request_id"]
# 轮询完成状态
while True:
status_response = requests.get(
f"{self.base_url}/request/{request_id}/status",
headers=self.headers
).json()
print(f"进度: {status_response.get('progress', 0)}%")
if status_response["status"] == "completed":
break
elif status_response["status"] == "failed":
raise Exception("研究失败")
time.sleep(5)
# 获取结果
results = requests.get(
f"{self.base_url}/request/{request_id}",
headers=self.headers
).json()
return results
def monitor_progress(self, request_id):
"""监控研究请求的详细进度"""
while True:
result = requests.get(
f"{self.base_url}/request/{request_id}",
headers=self.headers
).json()
step = result.get('step', 'unknown')
if step == 'identifying':
print("分析查询中...")
elif step == 'generating_schema':
print("创建数据结构...")
elif step == 'generating':
pages = result.get('pages_read', 0)
matched = result.get('matched_records', 0)
print(f"处理数据: 已读取 {pages} 页, 已匹配 {matched} 条记录")
elif step == 'done':
print("研究完成!")
return result
time.sleep(3)
# 使用示例
api = DeepLookupAPI("YOUR_API_KEY")
columns = [
{
"name": "company_name",
"description": "公司名称",
"type": "enrichment"
},
{
"name": "is_ai_company",
"description": "必须为 AI/ML 公司",
"type": "constraint"
},
{
"name": "employee_count",
"description": "员工数量",
"type": "enrichment"
},
{
"name": "min_50_employees",
"description": "至少有 50 名员工",
"type": "constraint"
}
]
results = api.research_with_spec(
"Find all AI companies in Israel with more than 50 employees",
columns,
limit=100
)
print(f"找到 {results['matched_records']} 家公司")
print(f"跳过 {results['skipped_records']} 家公司(不满足所有条件)")
print(f"总成本: {results['total_cost']}")