downloads working

2026-04-10 03:04:40 -07:00 · 2025-02-11 05:14:13 -08:00 · 2025-02-11 05:14:13 -08:00 · a7de906b5d
commit a7de906b5d
parent 055b39f604
1 changed files with 81 additions and 10 deletions
--- a/c2cscrape.py
+++ b/c2cscrape.py
@ -1,26 +1,30 @@
-# c2cscrape.py
-# This script scrapes the zfirelight blog for old episodes of coast to coast am and serves the audio/video as an rss feed for pocketcasts
-
 import requests
 from bs4 import BeautifulSoup
-from typing import Optional
+import datetime
+import os
+import re

 class C2CScrape:
    def __init__(self):
        self.url = 'https://zfirelight.blogspot.com/'
        self.episodes = []
        self.last_download = None
+        self.last_download_link = None
        self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }

-    def get_drive_link(self, url: str) -> Optional[str]:
+    def sanitize_filename(self, filename):
+        # Remove or replace invalid filename characters
+        return re.sub(r'[<>:"/\\|?*]', '-', filename)
+
+    def get_drive_link(self, url):
        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
+            self.soup = BeautifulSoup(response.text, 'html.parser')
            
-            iframes = soup.find_all('iframe')
+            iframes = self.soup.find_all('iframe')
            for iframe in iframes:
                src = iframe.get('src')
                if src and 'drive.google.com' in src:
@ -30,7 +34,25 @@ class C2CScrape:
            print(f'Error fetching page: {e}')
        return None

-    def create_download_link(self) -> Optional[str]:
+    def get_episode_info(self, soup):
+        title_element = soup.find('h3', class_='post-title entry-title')
+        if not title_element:
+            return None
+            
+        title_link = title_element.find('a')
+        if not title_link:
+            return None
+            
+        full_title = title_link.text
+        date_str = full_title.split(' ')[0]
+        
+        return {
+            'title': full_title,
+            'date': date_str,
+            'url': title_link['href']
+        }
+
+    def create_download_link(self):
        url = self.get_drive_link(self.url)
        if not url:
            return None
@ -45,6 +67,55 @@ class C2CScrape:
            print('Error: Invalid URL format')
            return None

+    def download_episode(self, url):
+        try:
+            episode_data = self.get_episode_info(self.soup)
+            if not episode_data:
+                print('Error: Could not get episode info')
+                return
+
+            # Check if already downloaded
+            date = datetime.datetime.now().strftime('%Y-%m-%d')
+            if date == self.last_download and url == self.last_download_link:
+                print('Episode already downloaded')
+                return
+
+            # Create downloads directory if it doesn't exist
+            download_dir = 'downloads'
+            os.makedirs(download_dir, exist_ok=True)
+
+            # Download the file
+            response = requests.get(url, headers=self.headers)
+            response.raise_for_status()
+
+            # Create sanitized filename
+            filename = f'{episode_data["title"]} {date}.mp4'
+            safe_filename = self.sanitize_filename(filename)
+            filepath = os.path.join(download_dir, safe_filename)
+            
+            with open(filepath, 'wb') as f:
+                f.write(response.content)
+                print(f'Downloaded: {safe_filename}')
+            
+            # Update last download info
+            self.last_download = date
+            self.last_download_link = url
+
+        except requests.RequestException as e:
+            print(f'Error downloading episode: {e}')
+        except Exception as e:
+            print(f'Error: {e}')
+
+    def process_episode(self):
+        drive_url = self.get_drive_link(self.url)
+        if drive_url:
+            download_url = self.create_download_link()
+            if download_url:
+                self.download_episode(download_url)
+
+class createRss:
+    pass
+
 if __name__ == '__main__':
    c2c = C2CScrape()
-    c2c.create_download_link()
+    c2c.process_episode()