优化输出信息
This commit is contained in:
91
main.py
91
main.py
@@ -98,7 +98,7 @@ def get_video_duration(video_url):
|
||||
return None
|
||||
|
||||
|
||||
def process_video(course_id, item_id):
|
||||
def process_video(course_id, item_id, current_index=0, total_items=0):
|
||||
"""
|
||||
处理单个视频:
|
||||
1. 获取视频页,提取 config URL
|
||||
@@ -106,6 +106,8 @@ def process_video(course_id, item_id):
|
||||
3. 循环发送心跳包,直到视频看完
|
||||
"""
|
||||
print(f"\n>>> 开始处理视频: CourseId={course_id}, ItemId={item_id}")
|
||||
if total_items > 0:
|
||||
print(f" 当前进度: 第 {current_index} 集 / 共 {total_items} 集")
|
||||
|
||||
# 1. 请求视频播放页
|
||||
video_page_url = f"https://zjbc.cjnep.net/lms/web/course/view?id={course_id}&itemid={item_id}"
|
||||
@@ -145,6 +147,10 @@ def process_video(course_id, item_id):
|
||||
total_time_str = root.findtext("totalTime")
|
||||
history_id = root.findtext("historyId")
|
||||
finish_status = root.findtext("finish")
|
||||
sco_title = root.findtext("scoTitle")
|
||||
|
||||
if sco_title:
|
||||
print(f" [课程名称] {sco_title}")
|
||||
|
||||
# 尝试获取视频 URL
|
||||
video_url = None
|
||||
@@ -373,29 +379,71 @@ if not course_data:
|
||||
|
||||
# 这里为了演示,我把之前的抓取逻辑简单复原一下:
|
||||
course_ids = []
|
||||
# 尝试读取课程ID缓存
|
||||
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
course_ids = json.load(f)
|
||||
print(f"正在读取课程ID缓存: {cache_file}")
|
||||
try:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
course_ids = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"[Warning] 读取课程ID缓存失败: {e}")
|
||||
|
||||
# 如果没有缓存,从网络获取
|
||||
if not course_ids:
|
||||
resp = requests.get(url, headers=headers)
|
||||
course_ids = re.findall(
|
||||
r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
|
||||
with open(cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(course_ids, f, indent=2)
|
||||
print("正在从服务器获取课程列表...")
|
||||
try:
|
||||
resp = requests.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
# 正则匹配课程ID
|
||||
course_ids = re.findall(
|
||||
r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
|
||||
|
||||
for cid in course_ids:
|
||||
detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}"
|
||||
resp = requests.get(detail_url, headers=headers)
|
||||
item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
|
||||
seen = set()
|
||||
unique_item_ids = [x for x in item_ids if not (
|
||||
x in seen or seen.add(x))]
|
||||
course_data.append({"courseId": cid, "itemIds": unique_item_ids})
|
||||
time.sleep(1)
|
||||
if course_ids:
|
||||
print(f"成功获取到 {len(course_ids)} 门课程。")
|
||||
with open(cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(course_ids, f, indent=2)
|
||||
else:
|
||||
print("[Error] 未能获取到任何课程ID,请检查Cookie是否失效或页面结构变更。")
|
||||
except Exception as e:
|
||||
print(f"[Error] 获取课程列表请求失败: {e}")
|
||||
|
||||
with open(items_cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(course_data, f, indent=2)
|
||||
# 遍历课程获取视频Item ID
|
||||
if course_ids:
|
||||
print(f"开始获取每门课程的视频列表 (共 {len(course_ids)} 门)...")
|
||||
for index, cid in enumerate(course_ids):
|
||||
print(f"[{index+1}/{len(course_ids)}] 正在解析课程 ID: {cid} ...",
|
||||
end="", flush=True)
|
||||
|
||||
try:
|
||||
detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}"
|
||||
resp = requests.get(detail_url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 提取 itemid
|
||||
item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
|
||||
|
||||
# 去重
|
||||
seen = set()
|
||||
unique_item_ids = [x for x in item_ids if not (
|
||||
x in seen or seen.add(x))]
|
||||
|
||||
course_data.append(
|
||||
{"courseId": cid, "itemIds": unique_item_ids})
|
||||
print(f" 发现 {len(unique_item_ids)} 个视频")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n [Error] 获取课程 {cid} 详情失败: {e}")
|
||||
|
||||
# 避免请求过快
|
||||
time.sleep(1)
|
||||
|
||||
# 保存最终结果
|
||||
print(f"正在保存课程数据到 {items_cache_file} ...")
|
||||
with open(items_cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(course_data, f, indent=2)
|
||||
print("课程数据抓取完成。")
|
||||
else:
|
||||
print("没有课程ID,跳过视频抓取步骤。")
|
||||
|
||||
|
||||
# 3. 开始刷课
|
||||
@@ -406,8 +454,9 @@ for course in course_data:
|
||||
items = course['itemIds']
|
||||
print(f"\n=== 正在处理课程 {cid},共 {len(items)} 个视频 ===")
|
||||
|
||||
for item_id in items:
|
||||
process_video(cid, item_id)
|
||||
for idx, item_id in enumerate(items):
|
||||
process_video(cid, item_id, current_index=idx +
|
||||
1, total_items=len(items))
|
||||
# 视频之间休息一下
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user