diff --git a/main.py b/main.py index f4cb322..85d2e1f 100644 --- a/main.py +++ b/main.py @@ -98,7 +98,7 @@ def get_video_duration(video_url): return None -def process_video(course_id, item_id): +def process_video(course_id, item_id, current_index=0, total_items=0): """ 处理单个视频: 1. 获取视频页,提取 config URL @@ -106,6 +106,8 @@ def process_video(course_id, item_id): 3. 循环发送心跳包,直到视频看完 """ print(f"\n>>> 开始处理视频: CourseId={course_id}, ItemId={item_id}") + if total_items > 0: + print(f" 当前进度: 第 {current_index} 集 / 共 {total_items} 集") # 1. 请求视频播放页 video_page_url = f"https://zjbc.cjnep.net/lms/web/course/view?id={course_id}&itemid={item_id}" @@ -145,6 +147,10 @@ def process_video(course_id, item_id): total_time_str = root.findtext("totalTime") history_id = root.findtext("historyId") finish_status = root.findtext("finish") + sco_title = root.findtext("scoTitle") + + if sco_title: + print(f" [课程名称] {sco_title}") # 尝试获取视频 URL video_url = None @@ -373,29 +379,71 @@ if not course_data: # 这里为了演示,我把之前的抓取逻辑简单复原一下: course_ids = [] + # 尝试读取课程ID缓存 if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0: - with open(cache_file, "r", encoding="utf-8") as f: - course_ids = json.load(f) + print(f"正在读取课程ID缓存: {cache_file}") + try: + with open(cache_file, "r", encoding="utf-8") as f: + course_ids = json.load(f) + except Exception as e: + print(f"[Warning] 读取课程ID缓存失败: {e}") + # 如果没有缓存,从网络获取 if not course_ids: - resp = requests.get(url, headers=headers) - course_ids = re.findall( - r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text) - with open(cache_file, "w", encoding="utf-8") as f: - json.dump(course_ids, f, indent=2) + print("正在从服务器获取课程列表...") + try: + resp = requests.get(url, headers=headers) + resp.raise_for_status() + # 正则匹配课程ID + course_ids = re.findall( + r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text) - for cid in course_ids: - detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}" - resp = requests.get(detail_url, headers=headers) - item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text) - seen = set() - unique_item_ids = [x for x in item_ids if not ( - x in seen or seen.add(x))] - course_data.append({"courseId": cid, "itemIds": unique_item_ids}) - time.sleep(1) + if course_ids: + print(f"成功获取到 {len(course_ids)} 门课程。") + with open(cache_file, "w", encoding="utf-8") as f: + json.dump(course_ids, f, indent=2) + else: + print("[Error] 未能获取到任何课程ID,请检查Cookie是否失效或页面结构变更。") + except Exception as e: + print(f"[Error] 获取课程列表请求失败: {e}") - with open(items_cache_file, "w", encoding="utf-8") as f: - json.dump(course_data, f, indent=2) + # 遍历课程获取视频Item ID + if course_ids: + print(f"开始获取每门课程的视频列表 (共 {len(course_ids)} 门)...") + for index, cid in enumerate(course_ids): + print(f"[{index+1}/{len(course_ids)}] 正在解析课程 ID: {cid} ...", + end="", flush=True) + + try: + detail_url = f"https://zjbc.cjnep.net/lms/web/course/detail?id={cid}" + resp = requests.get(detail_url, headers=headers) + resp.raise_for_status() + + # 提取 itemid + item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text) + + # 去重 + seen = set() + unique_item_ids = [x for x in item_ids if not ( + x in seen or seen.add(x))] + + course_data.append( + {"courseId": cid, "itemIds": unique_item_ids}) + print(f" 发现 {len(unique_item_ids)} 个视频") + + except Exception as e: + print(f"\n [Error] 获取课程 {cid} 详情失败: {e}") + + # 避免请求过快 + time.sleep(1) + + # 保存最终结果 + print(f"正在保存课程数据到 {items_cache_file} ...") + with open(items_cache_file, "w", encoding="utf-8") as f: + json.dump(course_data, f, indent=2) + print("课程数据抓取完成。") + else: + print("没有课程ID,跳过视频抓取步骤。") # 3. 开始刷课 @@ -406,8 +454,9 @@ for course in course_data: items = course['itemIds'] print(f"\n=== 正在处理课程 {cid},共 {len(items)} 个视频 ===") - for item_id in items: - process_video(cid, item_id) + for idx, item_id in enumerate(items): + process_video(cid, item_id, current_index=idx + + 1, total_items=len(items)) # 视频之间休息一下 time.sleep(2)