refactor, and use aiohttp

This commit is contained in:
2025-04-06 23:44:42 -07:00
parent c29955de7f
commit cd4c5e8bd4
18 changed files with 788 additions and 8926 deletions
+7
View File
@@ -0,0 +1,7 @@
from .scrapers.sakuracon import collect_sakuracon_events
import asyncio
def generate_all():
loop = asyncio.new_event_loop()
loop.run_until_complete(collect_sakuracon_events())
View File
+126
View File
@@ -0,0 +1,126 @@
import requests
from icalendar import Calendar, Event
from datetime import datetime
from collections import defaultdict
import asyncio
import aiohttp
import json
import os
EVENTNY_ENDPOINT = "https://www.eventeny.com/funcs/event/event-page-elements-2022-03-06.php"
SAK_BIZ_ID = "233997"
SAK_EVENT_ID = "13462"
async def collect_sakuracon_events():
events, tracks = await get_event_data()
cals = convert_events_to_icals(events, tracks)
write_ics(cals)
async def get_event_data():
# Send multiple POST requests to fetch schedule data
base_form_data = {
"post_type": "fetch_schedule_list",
"biz_id": SAK_BIZ_ID,
"event_id": SAK_EVENT_ID,
"tag_filter": "",
"track_filter": "50949|50946|50955|50947|50954|50950|53643|53642|50956|50948",
"embed": "1"
}
# why are the values here stupid and arbitrary? why does only the first request use date_group_setup 0?
# why do we send three requests when the time limits for the first clearly span the whole event?
# what are they hiding? who the hell knows? i just scraped it off the sakuracon site
requests_data = [
{**base_form_data, "date_group_setup": "0", "time_limit_min": "1744948800", "time_limit_max": "1745207999"},
{**base_form_data, "date_group_setup": "1", "time_limit_min": "1745035200", "time_limit_max": "1745121599"},
{**base_form_data, "date_group_setup": "1", "time_limit_min": "1745121600", "time_limit_max": "1745207999"}
]
all_events = []
all_tracks = {}
for form_data in requests_data:
response = requests.post(EVENTNY_ENDPOINT, data=form_data)
response.raise_for_status()
data = response.json()
all_events.extend(data['list'])
all_tracks.update(data['track'])
all_events = await insert_descriptions(all_events)
return all_events, all_tracks
async def get_description(eventid):
form_data = {
"post_type": "fetch_schedule_item",
"biz_id": SAK_BIZ_ID,
"event_id": SAK_EVENT_ID,
"ticket_cross_sell": "no",
"id": eventid
}
async with aiohttp.ClientSession() as session:
async with session.post(EVENTNY_ENDPOINT, data=form_data) as response:
data = await response.read()
results = json.loads(data.decode())
return results["schedule"]["overview"]["description"] or ""
async def insert_descriptions(events):
tasks = []
for event in events:
t = asyncio.create_task(
get_description(event["id"])
)
tasks.append(t)
descs = await asyncio.gather(*tasks)
for desc, event in zip(descs, events):
event["raw_description"] = desc
return events
def convert_events_to_icals(all_events, all_tracks) -> dict[str, Calendar]:
# Group events by track_title
calendars = defaultdict(Calendar)
for event in all_events:
track_id = event.get("tag_id")
track_title = all_tracks.get(track_id)
cal = calendars[track_title]
# Initialize calendar if empty
if not cal.get("prodid"):
cal.add("prodid", f"-//{track_title} Schedule - SC//")
cal.add("version", "2.0")
cal.add("X-WR-CALNAME", track_title)
# Create event
ical_event = Event()
ical_event.add("summary", event['title'])
ical_event.add("dtstart", datetime.fromisoformat(event['start_calendar']))
ical_event.add("dtend", datetime.fromisoformat(event['end_calendar']))
ical_event.add("location", event['location'].replace('&', '&'))
ical_event.add("status", event['status'].upper())
# Add hashtags as categories if available
if 'hashtag_title' in event and event['hashtag_title']:
ical_event.add('categories', event['hashtag_title'])
tags_str = f"Tags: {','.join(event['hashtag_title'])}\n"
ical_event.add("description", f"Track: {track_title}\n{tags_str}\n{event['raw_description']}")
cal.add_component(ical_event)
return calendars
def write_ics(calendars, output_dir="output/sakuracon"):
# Write out each calendar to a .ics file
for track_title, cal in calendars.items():
filename = f"{output_dir}/calendar_{track_title.replace(' ', '_')}.ics"
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'wb') as f:
f.write(cal.to_ical())
print(f"Wrote: {filename}")