refactor, and use aiohttp
This commit is contained in:
Executable
+7
@@ -0,0 +1,7 @@
|
||||
from .scrapers.sakuracon import collect_sakuracon_events
|
||||
import asyncio
|
||||
|
||||
def generate_all():
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.run_until_complete(collect_sakuracon_events())
|
||||
|
||||
Executable
Executable
+126
@@ -0,0 +1,126 @@
|
||||
import requests
|
||||
from icalendar import Calendar, Event
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
import os
|
||||
|
||||
EVENTNY_ENDPOINT = "https://www.eventeny.com/funcs/event/event-page-elements-2022-03-06.php"
|
||||
SAK_BIZ_ID = "233997"
|
||||
SAK_EVENT_ID = "13462"
|
||||
|
||||
async def collect_sakuracon_events():
|
||||
events, tracks = await get_event_data()
|
||||
cals = convert_events_to_icals(events, tracks)
|
||||
write_ics(cals)
|
||||
|
||||
|
||||
async def get_event_data():
|
||||
# Send multiple POST requests to fetch schedule data
|
||||
|
||||
base_form_data = {
|
||||
"post_type": "fetch_schedule_list",
|
||||
"biz_id": SAK_BIZ_ID,
|
||||
"event_id": SAK_EVENT_ID,
|
||||
"tag_filter": "",
|
||||
"track_filter": "50949|50946|50955|50947|50954|50950|53643|53642|50956|50948",
|
||||
"embed": "1"
|
||||
}
|
||||
|
||||
# why are the values here stupid and arbitrary? why does only the first request use date_group_setup 0?
|
||||
# why do we send three requests when the time limits for the first clearly span the whole event?
|
||||
# what are they hiding? who the hell knows? i just scraped it off the sakuracon site
|
||||
requests_data = [
|
||||
{**base_form_data, "date_group_setup": "0", "time_limit_min": "1744948800", "time_limit_max": "1745207999"},
|
||||
{**base_form_data, "date_group_setup": "1", "time_limit_min": "1745035200", "time_limit_max": "1745121599"},
|
||||
{**base_form_data, "date_group_setup": "1", "time_limit_min": "1745121600", "time_limit_max": "1745207999"}
|
||||
]
|
||||
|
||||
all_events = []
|
||||
all_tracks = {}
|
||||
|
||||
for form_data in requests_data:
|
||||
response = requests.post(EVENTNY_ENDPOINT, data=form_data)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
all_events.extend(data['list'])
|
||||
all_tracks.update(data['track'])
|
||||
|
||||
all_events = await insert_descriptions(all_events)
|
||||
|
||||
return all_events, all_tracks
|
||||
|
||||
async def get_description(eventid):
|
||||
form_data = {
|
||||
"post_type": "fetch_schedule_item",
|
||||
"biz_id": SAK_BIZ_ID,
|
||||
"event_id": SAK_EVENT_ID,
|
||||
"ticket_cross_sell": "no",
|
||||
"id": eventid
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(EVENTNY_ENDPOINT, data=form_data) as response:
|
||||
data = await response.read()
|
||||
results = json.loads(data.decode())
|
||||
return results["schedule"]["overview"]["description"] or ""
|
||||
|
||||
async def insert_descriptions(events):
|
||||
tasks = []
|
||||
for event in events:
|
||||
t = asyncio.create_task(
|
||||
get_description(event["id"])
|
||||
)
|
||||
tasks.append(t)
|
||||
|
||||
descs = await asyncio.gather(*tasks)
|
||||
|
||||
for desc, event in zip(descs, events):
|
||||
event["raw_description"] = desc
|
||||
return events
|
||||
|
||||
|
||||
def convert_events_to_icals(all_events, all_tracks) -> dict[str, Calendar]:
|
||||
# Group events by track_title
|
||||
calendars = defaultdict(Calendar)
|
||||
|
||||
for event in all_events:
|
||||
track_id = event.get("tag_id")
|
||||
track_title = all_tracks.get(track_id)
|
||||
|
||||
cal = calendars[track_title]
|
||||
|
||||
# Initialize calendar if empty
|
||||
if not cal.get("prodid"):
|
||||
cal.add("prodid", f"-//{track_title} Schedule - SC//")
|
||||
cal.add("version", "2.0")
|
||||
cal.add("X-WR-CALNAME", track_title)
|
||||
|
||||
# Create event
|
||||
ical_event = Event()
|
||||
ical_event.add("summary", event['title'])
|
||||
ical_event.add("dtstart", datetime.fromisoformat(event['start_calendar']))
|
||||
ical_event.add("dtend", datetime.fromisoformat(event['end_calendar']))
|
||||
ical_event.add("location", event['location'].replace('&', '&'))
|
||||
ical_event.add("status", event['status'].upper())
|
||||
|
||||
# Add hashtags as categories if available
|
||||
if 'hashtag_title' in event and event['hashtag_title']:
|
||||
ical_event.add('categories', event['hashtag_title'])
|
||||
tags_str = f"Tags: {','.join(event['hashtag_title'])}\n"
|
||||
|
||||
ical_event.add("description", f"Track: {track_title}\n{tags_str}\n{event['raw_description']}")
|
||||
|
||||
cal.add_component(ical_event)
|
||||
return calendars
|
||||
|
||||
def write_ics(calendars, output_dir="output/sakuracon"):
|
||||
# Write out each calendar to a .ics file
|
||||
for track_title, cal in calendars.items():
|
||||
filename = f"{output_dir}/calendar_{track_title.replace(' ', '_')}.ics"
|
||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(cal.to_ical())
|
||||
print(f"Wrote: {filename}")
|
||||
Reference in New Issue
Block a user