优化输出信息

This commit is contained in:
2025-12-12 14:12:28 +08:00
parent 5feede6030
commit 4287ec8234

91
main.py
View File

@@ -98,7 +98,7 @@ def get_video_duration(video_url):
return None
def process_video(course_id, item_id):
def process_video(course_id, item_id, current_index=0, total_items=0):
"""
处理单个视频:
1. 获取视频页,提取 config URL
@@ -106,6 +106,8 @@ def process_video(course_id, item_id):
3. 循环发送心跳包,直到视频看完
"""
print(f"\n>>> 开始处理视频: CourseId={course_id}, ItemId={item_id}")
if total_items > 0:
print(f" 当前进度: 第 {current_index} 集 / 共 {total_items}")
# 1. 请求视频播放页
video_page_url = f"https://zjbc.cjnep.net/lms/web/course/view?id={course_id}&itemid={item_id}"
@@ -145,6 +147,10 @@ def process_video(course_id, item_id):
total_time_str = root.findtext("totalTime")
history_id = root.findtext("historyId")
finish_status = root.findtext("finish")
sco_title = root.findtext("scoTitle")
if sco_title:
print(f" [课程名称] {sco_title}")
# 尝试获取视频 URL
video_url = None
@@ -373,29 +379,71 @@ if not course_data:
# 这里为了演示,我把之前的抓取逻辑简单复原一下:
course_ids = []
# 尝试读取课程ID缓存
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
with open(cache_file, "r", encoding="utf-8") as f:
course_ids = json.load(f)
print(f"正在读取课程ID缓存: {cache_file}")
try:
with open(cache_file, "r", encoding="utf-8") as f:
course_ids = json.load(f)
except Exception as e:
print(f"[Warning] 读取课程ID缓存失败: {e}")
# 如果没有缓存,从网络获取
if not course_ids:
resp = requests.get(url, headers=headers)
course_ids = re.findall(
r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
with open(cache_file, "w", encoding="utf-8") as f:
json.dump(course_ids, f, indent=2)
print("正在从服务器获取课程列表...")
try:
resp = requests.get(url, headers=headers)
resp.raise_for_status()
# 正则匹配课程ID
course_ids = re.findall(
r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
for cid in course_ids:
detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}"
resp = requests.get(detail_url, headers=headers)
item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
seen = set()
unique_item_ids = [x for x in item_ids if not (
x in seen or seen.add(x))]
course_data.append({"courseId": cid, "itemIds": unique_item_ids})
time.sleep(1)
if course_ids:
print(f"成功获取到 {len(course_ids)} 门课程。")
with open(cache_file, "w", encoding="utf-8") as f:
json.dump(course_ids, f, indent=2)
else:
print("[Error] 未能获取到任何课程ID请检查Cookie是否失效或页面结构变更。")
except Exception as e:
print(f"[Error] 获取课程列表请求失败: {e}")
with open(items_cache_file, "w", encoding="utf-8") as f:
json.dump(course_data, f, indent=2)
# 遍历课程获取视频Item ID
if course_ids:
print(f"开始获取每门课程的视频列表 (共 {len(course_ids)} 门)...")
for index, cid in enumerate(course_ids):
print(f"[{index+1}/{len(course_ids)}] 正在解析课程 ID: {cid} ...",
end="", flush=True)
try:
detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}"
resp = requests.get(detail_url, headers=headers)
resp.raise_for_status()
# 提取 itemid
item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
# 去重
seen = set()
unique_item_ids = [x for x in item_ids if not (
x in seen or seen.add(x))]
course_data.append(
{"courseId": cid, "itemIds": unique_item_ids})
print(f" 发现 {len(unique_item_ids)} 个视频")
except Exception as e:
print(f"\n [Error] 获取课程 {cid} 详情失败: {e}")
# 避免请求过快
time.sleep(1)
# 保存最终结果
print(f"正在保存课程数据到 {items_cache_file} ...")
with open(items_cache_file, "w", encoding="utf-8") as f:
json.dump(course_data, f, indent=2)
print("课程数据抓取完成。")
else:
print("没有课程ID跳过视频抓取步骤。")
# 3. 开始刷课
@@ -406,8 +454,9 @@ for course in course_data:
items = course['itemIds']
print(f"\n=== 正在处理课程 {cid},共 {len(items)} 个视频 ===")
for item_id in items:
process_video(cid, item_id)
for idx, item_id in enumerate(items):
process_video(cid, item_id, current_index=idx +
1, total_items=len(items))
# 视频之间休息一下
time.sleep(2)