This repository has been archived on 2025-04-11. You can view files and clone it, but cannot push or open issues or pull requests.
covid-19-discord-bot/lib/parse_data.py

103 lines
3.5 KiB
Python
Raw Normal View History

2020-03-25 08:12:59 -05:00
#!/usr/bin/python
import requests
from bs4 import BeautifulSoup
import json
from os.path import exists
from inspect import getmembers, isroutine
from lib.covid_data_lib import init_database, set_data, get_formatted_data, get_top_n_rows
2020-03-25 08:12:59 -05:00
# San Antonio url
sa_data_url = 'https://www.sanantonio.gov/health/news/alerts/coronavirus'
def format_parse_int(num):
output = ''
count = 0
while not num == 0:
if(count == 3):
output += ','
count = 0
output += f'{num%10}'
num = int(num/10)
count += 1
return output[::-1]
def import_config(path='config/config.json'):
if(exists(path)):
try:
with open(path) as config_file:
config_dict = json.load(config_file)
except Exception as e:
print(f'There was some issue opening and loading the config.\n{e}')
exit(1)
else:
print('Didn\'t find the config file.')
exit(1)
return config_dict
def update_data():
try:
print('Creating session.')
session = init_database(import_config())
except Exception as e:
session.rollback()
print(f'There was an error trying to create a database session:\n{e}')
2020-03-25 08:12:59 -05:00
data_html = requests.get('https://www.worldometers.info/coronavirus/')
if(data_html.status_code == '200' or data_html.status_code == 200):
parsed_html = BeautifulSoup(data_html.text, features='html.parser')
2020-03-25 08:12:59 -05:00
table = parsed_html.find('table', id='main_table_countries_today')
for row in table.findAll('tr'):
if(row and row.findAll('td')):
if(row.find('a')):
set_data(
session, row.find('a').text, [r.text for r in row.findAll('td')])
elif(row.findAll('td')[0] and row.findAll('td')[0].text):
set_data(
session, row.findAll('td')[0].text.replace(':', ''), [r.text for r in row.findAll('td')])
def format_covid_data(columns, data):
output = ''
get_values = [attr for attr in getmembers(data, lambda a:not(
isroutine(a))) if not(attr[0].startswith('__') and attr[0].endswith('__')) and not attr[0].startswith('_') and not attr[0] == 'metadata']
output += f'{columns[4]}: {get_values[4][1]}\n'
output += f'{columns[6]}: {format_parse_int(int(get_values[6][1]))}\n'
output += f'{columns[1]}: {format_parse_int(int(get_values[1][1]))}\n'
output += f'{columns[8]}: {format_parse_int(int(get_values[8][1]))}\n'
output += f'{columns[2]}: {format_parse_int(int(get_values[2][1]))}\n'
output += f'{columns[9]}: {format_parse_int(int(get_values[9][1]))}\n'
output += f'{columns[0]}: {format_parse_int(int(get_values[0][1]))}\n'
output += f'{columns[5]}: {format_parse_int(int(get_values[5][1]))}\n'
output += f'{columns[7]}: {get_values[7][1]}\n'
return output
def get_covid_data(selection):
print('Updating data.')
columns, all_data_query = get_formatted_data(
init_database(import_config()), selection)
output = ''
for data in all_data_query:
output += format_covid_data(columns, data)
return output
2020-03-25 08:12:59 -05:00
def get_top_data(number):
top_n_rows = get_top_n_rows(init_database(import_config()), number + 1)
output = ''
count = 0
for row in top_n_rows:
if(not count == 0):
output += f'# {count}\n{row.selection_original}: {format_parse_int(int(row.total_cases))}'
if(not count == number):
output += '\n'
count += 1
return output
2020-03-25 08:12:59 -05:00
if(__name__ == '__main__'):
print(get_covid_data())