mirror of
https://github.com/Death916/c2cscrape.git
synced 2026-04-10 03:04:40 -07:00
check for existing file
This commit is contained in:
parent
a7de906b5d
commit
6fa543b9bb
1 changed files with 62 additions and 11 deletions
73
c2cscrape.py
73
c2cscrape.py
|
|
@ -74,25 +74,27 @@ class C2CScrape:
|
|||
print('Error: Could not get episode info')
|
||||
return
|
||||
|
||||
# Check if already downloaded
|
||||
date = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||
if date == self.last_download and url == self.last_download_link:
|
||||
print('Episode already downloaded')
|
||||
return
|
||||
|
||||
# Create downloads directory if it doesn't exist
|
||||
download_dir = 'downloads'
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
# Download the file
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
# Get current date
|
||||
date = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
# Create sanitized filename
|
||||
filename = f'{episode_data["title"]} {date}.mp4'
|
||||
safe_filename = self.sanitize_filename(filename)
|
||||
filepath = os.path.join(download_dir, safe_filename)
|
||||
|
||||
|
||||
# Check if file already exists
|
||||
if os.path.exists(filepath):
|
||||
print(f'File already exists: {safe_filename}')
|
||||
return
|
||||
|
||||
# Download the file
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(response.content)
|
||||
print(f'Downloaded: {safe_filename}')
|
||||
|
|
@ -106,6 +108,7 @@ class C2CScrape:
|
|||
except Exception as e:
|
||||
print(f'Error: {e}')
|
||||
|
||||
|
||||
def process_episode(self):
|
||||
drive_url = self.get_drive_link(self.url)
|
||||
if drive_url:
|
||||
|
|
@ -114,8 +117,56 @@ class C2CScrape:
|
|||
self.download_episode(download_url)
|
||||
|
||||
class createRss:
|
||||
pass
|
||||
def __init__(self):
|
||||
self.episodes = []
|
||||
self.feed = None
|
||||
self.feed_title = 'Coast to Coast AM'
|
||||
self.feed_link = 'https://zfirelight.blogspot.com/'
|
||||
self.feed_description = 'Coast to Coast AM episodes'
|
||||
|
||||
def create_feed(self):
|
||||
self.feed = f'<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
self.feed += f'<rss version="2.0">\n'
|
||||
self.feed += f'<channel>\n'
|
||||
self.feed += f'<title>{self.feed_title}</title>\n'
|
||||
self.feed += f'<link>{self.feed_link}</link>\n'
|
||||
self.feed += f'<description>{self.feed_description}</description>\n'
|
||||
for episode in self.episodes:
|
||||
self.feed += f'<item>\n'
|
||||
self.feed += f'<title>{episode["title"]}</title>\n'
|
||||
self.feed += f'<link>{episode["url"]}</link>\n'
|
||||
self.feed += f'<description>{episode["date"]}</description>\n'
|
||||
self.feed += f'</item>\n'
|
||||
self.feed += f'</channel>\n'
|
||||
self.feed += f'</rss>\n'
|
||||
|
||||
def save_feed(self):
|
||||
try:
|
||||
with open('feed.xml', 'w') as f:
|
||||
f.write(self.feed)
|
||||
print('Feed saved')
|
||||
except Exception as e:
|
||||
print(f'Error saving feed: {e}')
|
||||
|
||||
def add_episode(self, episode):
|
||||
self.episodes.append(episode)
|
||||
|
||||
def process_episodes(self):
|
||||
c2c = C2CScrape()
|
||||
response = requests.get(c2c.url, headers=c2c.headers)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
posts = soup.find_all('div', class_='post hentry')
|
||||
for post in posts:
|
||||
episode = c2c.get_episode_info(post)
|
||||
if episode:
|
||||
self.add_episode(episode)
|
||||
self.create_feed()
|
||||
self.save_feed()
|
||||
|
||||
if __name__ == '__main__':
|
||||
c2c = C2CScrape()
|
||||
c2c.process_episode()
|
||||
rss = createRss()
|
||||
rss.process_episodes()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue