1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
| import os
import time
import ssl
import requests
import pandas as pd
from datetime import datetime, timedelta
from urllib3.poolmanager import PoolManager
from urllib3.util.ssl_ import create_urllib3_context
# ===============================
# 1. 彻底禁用代理
# ===============================
os.environ["HTTP_PROXY"] = ""
os.environ["HTTPS_PROXY"] = ""
os.environ["ALL_PROXY"] = ""
# ===============================
# 2. SSL Adapter(银行专用)
# ===============================
class LegacySSLAdapter(requests.adapters.HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
ctx = create_urllib3_context()
ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
self.poolmanager = PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
ssl_context=ctx
)
# ===============================
# 3. 请求基础配置
# ===============================
URL = "https://papi.icbc.com.cn/exchanges/ns/history"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
"Content-Type": "application/json",
"Origin": "https://www.icbc.com.cn",
"Referer": "https://www.icbc.com.cn/ICBC/"
}
PROXIES = {"http": None, "https": None}
# ===============================
# 4. 创建 Session
# ===============================
session = requests.Session()
session.mount("https://", LegacySSLAdapter())
# ===============================
# 5. 单日抓取函数(带重试)
# ===============================
def fetch_one_day(date_str, max_retry=3):
payload = {
"date": date_str,
"currType": "",
"serverType": "1"
}
for attempt in range(1, max_retry + 1):
try:
r = session.post(
URL,
headers=HEADERS,
json=payload,
timeout=15,
proxies=PROXIES
)
r.raise_for_status()
data = r.json()
if data.get("code") == 0:
return data["data"]
print(f"[WARN] {date_str} 返回异常 code: {data}")
return None
except Exception as e:
print(f"[RETRY {attempt}] {date_str} 失败: {e}")
time.sleep(2 * attempt)
return None
# ===============================
# 6. 时间范围(近 5 年)
# ===============================
end_date = datetime.today()
start_date = end_date - timedelta(days=5 * 365)
# ===============================
# 7. 主循环
# ===============================
all_rows = []
missing_dates = []
current = start_date
while current <= end_date:
date_str = current.strftime("%Y-%m-%d")
print(f"📅 抓取 {date_str}")
day_data = fetch_one_day(date_str)
if not day_data:
missing_dates.append(date_str)
else:
for item in day_data:
all_rows.append({
"date": date_str,
"currencyENName": item.get("currencyENName"),
"currencyCHName": item.get("currencyCHName"),
"reference": item.get("reference"),
"foreignBuy": item.get("foreignBuy"),
"foreignSell": item.get("foreignSell"),
"cashBuy": item.get("cashBuy"),
"cashSell": item.get("cashSell"),
})
time.sleep(0.5) # 银行接口,别太猛
current += timedelta(days=1)
session.close()
# ===============================
# 8. DataFrame & CSV
# ===============================
df = pd.DataFrame(all_rows)
csv_path = os.path.join(os.path.dirname(__file__), "icbc_exchange_5y.csv")
df.to_csv(csv_path, index=False, encoding="utf-8-sig")
print("\n✅ 数据抓取完成")
print(f"📄 CSV 文件:{csv_path}")
print(f"📊 总记录数:{len(df)}")
if missing_dates:
print(f"⚠️ 缺失日期 ({len(missing_dates)}):")
print(missing_dates)
else:
print("🎉 无缺失日期")
|