import pprint from datetime import datetime from typing import List from urllib.parse import quote import httpx from aiocache import cached, Cache from bs4 import BeautifulSoup from pydantic import BaseModel _http = httpx.AsyncClient(timeout=20, follow_redirects=True) class PrayerTimes(BaseModel): date: datetime fajr: str sun: str dhuhr: str asr: str maghrib: str isha: str @cached(ttl=3*60*60, cache=Cache.MEMORY) async def parse_prayer_times(url: str) -> List[PrayerTimes]: res = await _http.get(url) res.raise_for_status() soup = BeautifulSoup(res.text, 'html.parser') items = [] for row in soup.select('#tab-1 .vakit-table tbody tr'): cell_texts = (c.text.strip() for c in row.select('td')) headers = PrayerTimes.__fields__.keys() parsed = dict(zip(headers, cell_texts)) parsed['date'] = datetime.strptime(parsed['date'], '%d.%m.%Y') items.append(PrayerTimes(**parsed)) return items def make_location_url(location_id: int) -> str: url = f'https://namazvakitleri.diyanet.gov.tr/en-US/{location_id}' return url async def get_prayer_times(location_id: int) -> List[PrayerTimes]: url = make_location_url(location_id) return await parse_prayer_times(url) async def fetch_locations() -> List[dict]: countries = await _get_countries() locations = [] for country_name, cities in countries.items(): cities: dict country_id = cities['_countryId'] has_regions = cities['_hasRegions'] del cities['_countryId'] del cities['_hasRegions'] if not has_regions: for (cname, cid) in cities.items(): locations.append(dict( country_id=country_id, country_name=country_name, city_id=cid, city_name=cname, )) continue ctasks = (_get_regions(country_id, cid) for ckey, cid in cities.items()) regions = await asyncio.gather(*ctasks) for (cname, cid), cregions in zip(cities.items(), regions): for rname, rid in cregions.items(): locations.append(dict( country_id=country_id, country_name=country_name, city_id=cid, city_name=cname, region_id=rid, region_name=rname, )) return locations async def _get_cities(country_id: int) -> dict: url = f'https://namazvakitleri.diyanet.gov.tr/en-US/home/GetRegList' \ f'?ChangeType=country&CountryId={country_id}&Culture=tr-TR' res = await _http.get(url) data = res.json() if data['HasStateList']: items = ((it['SehirAdi'], int(it['SehirID'])) for it in data['StateList']) else: items = ((it['IlceAdi'], int(it['IlceID'])) for it in data['StateRegionList']) cities = dict(items) cities['_countryId'] = country_id cities['_hasRegions'] = data['HasStateList'] return cities async def _get_regions(country_id: int, city_id: int) -> dict: url = f'https://namazvakitleri.diyanet.gov.tr/tr-TR/home/GetRegList' \ f'?ChangeType=state&CountryId={country_id}&Culture=tr-TR&StateId={city_id}' res = await _http.get(url) data = res.json() items = ((it['IlceAdi'], int(it['IlceID'])) for it in data['StateRegionList']) return dict(items) async def _get_countries() -> dict: url = 'https://namazvakitleri.diyanet.gov.tr/tr-TR' res = await _http.get(url) soup = BeautifulSoup(res.text, 'html.parser') countries = {} for it in soup.select('select[name=country] option'): countries[it.text] = int(it['value']) return countries if __name__ == '__main__': import asyncio async def main(): # with core.diyanetdb.get_connection() as conn: # id = core.diyanetdb.get_location_id(conn, # country_name='AVUSTURYA', # city_name='PESSENDELLACH') # url = make_location_url(id) times = await parse_prayer_times('https://namazvakitleri.diyanet.gov.tr/en-US') print(times) loop = asyncio.get_event_loop() results = loop.run_until_complete(main()) pprint.pprint(results)