From 0b9659d660906f8c585ceb44cd65eba34aeb5cf2 Mon Sep 17 00:00:00 2001
From: NCJOAQ <2627723488@qq.com>
Date: Fri, 12 Dec 2025 16:37:12 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=BE=E7=A8=8B=E6=88=AA?=
 =?UTF-8?q?=E5=8F=96=E9=80=BB=E8=BE=91=EF=BC=8C=E5=AE=8C=E6=88=90=E5=90=8E?=
 =?UTF-8?q?=E5=88=A0=E9=99=A4=E7=BC=93=E5=AD=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 8 deletions(-)
diff --git a/main.py b/main.py
index fdb32d3..4cf5c3d 100644
--- a/main.py
+++ b/main.py
@@ -15,7 +15,6 @@ import xml.etree.ElementTree as ET
 
 # [警告] 下方的 cookie 包含敏感登录信息，请勿泄露给他人！
 cookie = ""
-
 url = "https://zjbc.cjnep.net/lms/web/course/index"
 cache_file = "course_ids.json"
 items_cache_file = "course_items.json"
@@ -342,7 +341,7 @@ print(f"当前心跳间隔: {HEARTBEAT_INTERVAL}秒 (警告：请勿设置过低
 print(f"每次进度增加: {ADD_TIME}秒")
 print("您需要安装ffmpeg工具集，确保ffprobe命令可用，可从https://ffmpeg.org/download.html 下载")
 print("脚本仅供学习交流使用，请勿用于商业用途！")
-print("作者：NCJOAQ & Github Compilot")
+print("作者：NCJOAQ & Github Copilot")
 print("="*60 + "\n")
 
 course_data = []
@@ -365,7 +364,7 @@ if os.path.exists(items_cache_file) and os.path.getsize(items_cache_file) > 0:
 
 if not course_data:
     print("未找到有效的课程数据缓存 (course_items.json)。")
-    print("请先运行脚本生成缓存，或检查网络连接。")
+    print("准备执行抓取流程...")
 
     # --- 原有的抓取逻辑 (简化版) ---
     # (此处保留你之前的抓取代码，如果需要的话，可以把之前的抓取逻辑放回来)
@@ -412,6 +411,11 @@ if not course_data:
             if matches:
                 print(f"[*] 解析到 {len(matches)} 个课程块，正在过滤已完成课程...")
                 for cid, content in matches:
+                    # 特殊排除 ID 373 (人脸识别测试课程)
+                    if str(cid) == '373':
+                        print(f"    [-] 跳过人脸识别测试课程 (ID: {cid})")
+                        continue
+
                     # 检查是否存在“已完成”的标记图片 (22cn_03.png)
                     if "22cn_03.png" in content:
                         print(f"    [-] 跳过已完成课程 (ID: {cid})")
@@ -421,8 +425,10 @@ if not course_data:
             # 如果上面的复杂正则没匹配到任何东西（可能页面结构变了），回退到简单正则
             if not matches and not course_ids:
                 print("[!] 未能通过高级过滤匹配到课程，尝试使用基础匹配（将包含已完成课程）...")
-                course_ids = re.findall(
+                found_ids = re.findall(
                     r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
+                # 过滤掉 373
+                course_ids = [cid for cid in found_ids if str(cid) != '373']
 
             if course_ids:
                 print(f"成功获取到 {len(course_ids)} 门未完成课程。")
@@ -445,17 +451,70 @@ if not course_data:
                 resp = requests.get(detail_url, headers=headers)
                 resp.raise_for_status()
 
-                # 提取 itemid
-                item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
+                # 提取视频信息并过滤已完成的
+                # 匹配模式：包含进度信息、总时长和itemid的视频块
+                # 注意：re.DOTALL 让 . 匹配换行符
+                video_pattern = re.compile(
+                    r'<div class="vd-item item trans borderb">.*?'
+                    r'class="col-lg-3 col-md-3 col-xs-7 tc3">(.*?)</div>.*?'
+                    r'class="trans date tc9">([\d:]+)</span>.*?'
+                    r'href="[^"]*[?&]itemid=(\d+)"',
+                    re.DOTALL
+                )
+
+                matches = video_pattern.findall(resp.text)
+
+                valid_item_ids = []
+
+                if matches:
+                    print(f"    [*] 找到 {len(matches)} 个视频，正在检查进度...")
+                    for progress_str, total_time_str, item_id in matches:
+                        # 解析已学习时间
+                        learned_seconds = 0
+                        if "分钟" in progress_str:
+                            try:
+                                tmp = progress_str.replace(
+                                    "已学习", "").replace("秒", "")
+                                parts = tmp.split("分钟")
+                                if len(parts) >= 1:
+                                    learned_seconds += int(parts[0]) * 60
+                                if len(parts) >= 2 and parts[1]:
+                                    learned_seconds += int(parts[1])
+                            except:
+                                pass
+
+                        # 解析总时间 (格式 154:16)
+                        total_seconds = 0
+                        try:
+                            parts = total_time_str.split(":")
+                            if len(parts) == 2:
+                                total_seconds = int(
+                                    parts[0]) * 60 + int(parts[1])
+                            elif len(parts) == 3:
+                                total_seconds = int(
+                                    parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+                        except:
+                            pass
+
+                        # 判断是否已完成 (允许 30秒 误差)
+                        if total_seconds > 0 and learned_seconds >= (total_seconds - 30):
+                            print(
+                                f"    [-] 视频 {item_id} 已完成 ({learned_seconds}s/{total_seconds}s)，跳过")
+                        else:
+                            valid_item_ids.append(item_id)
+                else:
+                    # 如果正则没匹配到（可能是页面改版），回退到简单匹配
+                    print("    [!] 未能解析视频详情，尝试使用基础匹配（可能包含已完成视频）...")
+                    valid_item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
 
                 # 去重
                 seen = set()
-                unique_item_ids = [x for x in item_ids if not (
+                unique_item_ids = [x for x in valid_item_ids if not (
                     x in seen or seen.add(x))]
 
                 course_data.append(
                     {"courseId": cid, "itemIds": unique_item_ids})
-                print(f" 发现 {len(unique_item_ids)} 个视频")
+                print(f"    [+] 最终加入 {len(unique_item_ids)} 个待刷视频")
 
             except Exception as e:
                 print(f"\n    [Error] 获取课程 {cid} 详情失败: {e}")
@@ -477,6 +536,12 @@ print(f"\n>>> 开始刷课任务，共 {len(course_data)} 门课程")
 
 for course in course_data:
     cid = course['courseId']
+
+    # 二次检查：跳过 ID 373
+    if str(cid) == '373':
+        print(f"[-] 跳过人脸识别测试课程 (ID: {cid})")
+        continue
+
     items = course['itemIds']
     print(f"\n=== 正在处理课程 {cid}，共 {len(items)} 个视频 ===")
 
@@ -487,3 +552,13 @@ for course in course_data:
         time.sleep(2)
 
 print("\n所有任务完成。")
+
+# 任务完成后清理缓存文件
+print("正在清理缓存文件...")
+for f in [cache_file, items_cache_file, progress_cache_file]:
+    if os.path.exists(f):
+        try:
+            os.remove(f)
+            print(f"[-] 已删除缓存: {f}")
+        except Exception as e:
+            print(f"[!] 删除缓存 {f} 失败: {e}")