更新课程截取逻辑,完成后删除缓存
This commit is contained in:
91
main.py
91
main.py
@@ -15,7 +15,6 @@ import xml.etree.ElementTree as ET
|
||||
|
||||
# [警告] 下方的 cookie 包含敏感登录信息,请勿泄露给他人!
|
||||
cookie = ""
|
||||
|
||||
url = "https://zjbc.cjnep.net/lms/web/course/index"
|
||||
cache_file = "course_ids.json"
|
||||
items_cache_file = "course_items.json"
|
||||
@@ -342,7 +341,7 @@ print(f"当前心跳间隔: {HEARTBEAT_INTERVAL}秒 (警告:请勿设置过低
|
||||
print(f"每次进度增加: {ADD_TIME}秒")
|
||||
print("您需要安装ffmpeg工具集,确保ffprobe命令可用,可从https://ffmpeg.org/download.html 下载")
|
||||
print("脚本仅供学习交流使用,请勿用于商业用途!")
|
||||
print("作者:NCJOAQ & Github Compilot")
|
||||
print("作者:NCJOAQ & Github Copilot")
|
||||
print("="*60 + "\n")
|
||||
|
||||
course_data = []
|
||||
@@ -365,7 +364,7 @@ if os.path.exists(items_cache_file) and os.path.getsize(items_cache_file) > 0:
|
||||
|
||||
if not course_data:
|
||||
print("未找到有效的课程数据缓存 (course_items.json)。")
|
||||
print("请先运行脚本生成缓存,或检查网络连接。")
|
||||
print("准备执行抓取流程...")
|
||||
|
||||
# --- 原有的抓取逻辑 (简化版) ---
|
||||
# (此处保留你之前的抓取代码,如果需要的话,可以把之前的抓取逻辑放回来)
|
||||
@@ -412,6 +411,11 @@ if not course_data:
|
||||
if matches:
|
||||
print(f"[*] 解析到 {len(matches)} 个课程块,正在过滤已完成课程...")
|
||||
for cid, content in matches:
|
||||
# 特殊排除 ID 373 (人脸识别测试课程)
|
||||
if str(cid) == '373':
|
||||
print(f" [-] 跳过人脸识别测试课程 (ID: {cid})")
|
||||
continue
|
||||
|
||||
# 检查是否存在“已完成”的标记图片 (22cn_03.png)
|
||||
if "22cn_03.png" in content:
|
||||
print(f" [-] 跳过已完成课程 (ID: {cid})")
|
||||
@@ -421,8 +425,10 @@ if not course_data:
|
||||
# 如果上面的复杂正则没匹配到任何东西(可能页面结构变了),回退到简单正则
|
||||
if not matches and not course_ids:
|
||||
print("[!] 未能通过高级过滤匹配到课程,尝试使用基础匹配(将包含已完成课程)...")
|
||||
course_ids = re.findall(
|
||||
found_ids = re.findall(
|
||||
r"window\.location\s*=\s*['\"]/lms/web/course/detail\?id=(\d+)['\"]", resp.text)
|
||||
# 过滤掉 373
|
||||
course_ids = [cid for cid in found_ids if str(cid) != '373']
|
||||
|
||||
if course_ids:
|
||||
print(f"成功获取到 {len(course_ids)} 门未完成课程。")
|
||||
@@ -445,17 +451,70 @@ if not course_data:
|
||||
resp = requests.get(detail_url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 提取 itemid
|
||||
item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
|
||||
# 提取视频信息并过滤已完成的
|
||||
# 匹配模式:包含进度信息、总时长和itemid的视频块
|
||||
# 注意:re.DOTALL 让 . 匹配换行符
|
||||
video_pattern = re.compile(
|
||||
r'<div class="vd-item item trans borderb">.*?'
|
||||
r'class="col-lg-3 col-md-3 col-xs-7 tc3">(.*?)</div>.*?'
|
||||
r'class="trans date tc9">([\d:]+)</span>.*?'
|
||||
r'href="[^"]*[?&]itemid=(\d+)"',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
matches = video_pattern.findall(resp.text)
|
||||
|
||||
valid_item_ids = []
|
||||
|
||||
if matches:
|
||||
print(f" [*] 找到 {len(matches)} 个视频,正在检查进度...")
|
||||
for progress_str, total_time_str, item_id in matches:
|
||||
# 解析已学习时间
|
||||
learned_seconds = 0
|
||||
if "分钟" in progress_str:
|
||||
try:
|
||||
tmp = progress_str.replace(
|
||||
"已学习", "").replace("秒", "")
|
||||
parts = tmp.split("分钟")
|
||||
if len(parts) >= 1:
|
||||
learned_seconds += int(parts[0]) * 60
|
||||
if len(parts) >= 2 and parts[1]:
|
||||
learned_seconds += int(parts[1])
|
||||
except:
|
||||
pass
|
||||
|
||||
# 解析总时间 (格式 154:16)
|
||||
total_seconds = 0
|
||||
try:
|
||||
parts = total_time_str.split(":")
|
||||
if len(parts) == 2:
|
||||
total_seconds = int(
|
||||
parts[0]) * 60 + int(parts[1])
|
||||
elif len(parts) == 3:
|
||||
total_seconds = int(
|
||||
parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
|
||||
except:
|
||||
pass
|
||||
|
||||
# 判断是否已完成 (允许 30秒 误差)
|
||||
if total_seconds > 0 and learned_seconds >= (total_seconds - 30):
|
||||
print(
|
||||
f" [-] 视频 {item_id} 已完成 ({learned_seconds}s/{total_seconds}s),跳过")
|
||||
else:
|
||||
valid_item_ids.append(item_id)
|
||||
else:
|
||||
# 如果正则没匹配到(可能是页面改版),回退到简单匹配
|
||||
print(" [!] 未能解析视频详情,尝试使用基础匹配(可能包含已完成视频)...")
|
||||
valid_item_ids = re.findall(r'[?&]itemid=(\d+)', resp.text)
|
||||
|
||||
# 去重
|
||||
seen = set()
|
||||
unique_item_ids = [x for x in item_ids if not (
|
||||
unique_item_ids = [x for x in valid_item_ids if not (
|
||||
x in seen or seen.add(x))]
|
||||
|
||||
course_data.append(
|
||||
{"courseId": cid, "itemIds": unique_item_ids})
|
||||
print(f" 发现 {len(unique_item_ids)} 个视频")
|
||||
print(f" [+] 最终加入 {len(unique_item_ids)} 个待刷视频")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n [Error] 获取课程 {cid} 详情失败: {e}")
|
||||
@@ -477,6 +536,12 @@ print(f"\n>>> 开始刷课任务,共 {len(course_data)} 门课程")
|
||||
|
||||
for course in course_data:
|
||||
cid = course['courseId']
|
||||
|
||||
# 二次检查:跳过 ID 373
|
||||
if str(cid) == '373':
|
||||
print(f"[-] 跳过人脸识别测试课程 (ID: {cid})")
|
||||
continue
|
||||
|
||||
items = course['itemIds']
|
||||
print(f"\n=== 正在处理课程 {cid},共 {len(items)} 个视频 ===")
|
||||
|
||||
@@ -487,3 +552,13 @@ for course in course_data:
|
||||
time.sleep(2)
|
||||
|
||||
print("\n所有任务完成。")
|
||||
|
||||
# 任务完成后清理缓存文件
|
||||
print("正在清理缓存文件...")
|
||||
for f in [cache_file, items_cache_file, progress_cache_file]:
|
||||
if os.path.exists(f):
|
||||
try:
|
||||
os.remove(f)
|
||||
print(f"[-] 已删除缓存: {f}")
|
||||
except Exception as e:
|
||||
print(f"[!] 删除缓存 {f} 失败: {e}")
|
||||
|
||||
Reference in New Issue
Block a user