diff --git a/apod/utility.py b/apod/utility.py index 3d57eac..fb01b8a 100644 --- a/apod/utility.py +++ b/apod/utility.py @@ -24,8 +24,10 @@ # Create urllib3 Pool Manager http = urllib3.PoolManager() -# function for getting video thumbnails def _get_thumbs(data): + """ + Function for getting video thumbnails + """ global video_thumb if "youtube" in data or "youtu.be" in data: # get ID from YouTube URL @@ -48,21 +50,36 @@ def _get_thumbs(data): return video_thumb -# function that returns only last URL if there are multiple URLs stacked together + def _get_last_url(data): + """ + Function that returns only last URL if there are multiple URLs stacked together + """ regex = re.compile("(?:.(?!http[s]?://))+$") return regex.findall(data)[0] -def _get_apod_chars(dt, thumbs): - media_type = 'image' + +def _format_url(dt): + """ + Returns url for APOD page + """ if dt: date_str = dt.strftime('%y%m%d') apod_url = '%sap%s.html' % (BASE, date_str) else: apod_url = '%sastropix.html' % BASE + return apod_url + + +def _get_apod_chars(dt, thumbs): + """ + Gets data from APOD page + """ + media_type = 'image' + apod_url = _format_url(dt) LOG.debug('OPENING URL:' + apod_url) res = requests.get(apod_url) - + if res.status_code == 404: return None # LOG.error(f'No APOD entry for URL: {apod_url}') @@ -97,10 +114,8 @@ def _get_apod_chars(dt, thumbs): media_type = 'other' data = '' - props = {} + props = {'explanation': _explanation(soup), 'title': _title(soup)} - props['explanation'] = _explanation(soup) - props['title'] = _title(soup) copyright_text = _copyright(soup) if copyright_text: props['copyright'] = copyright_text @@ -132,7 +147,7 @@ def _title(soup): try: # Handler for later APOD entries number_of_center_elements = len(soup.find_all('center')) - if(number_of_center_elements == 2): + if (number_of_center_elements == 2): center_selection = soup.find_all('center')[0] bold_selection = center_selection.find_all('b')[0] title = bold_selection.text.strip(' ') @@ -148,7 +163,7 @@ def _title(soup): title = title.encode('latin1').decode('cp1252') except Exception as ex: LOG.error(str(ex)) - + return title except Exception: # Handler for early APOD entries @@ -264,7 +279,7 @@ def _date(soup): _today = datetime.date.today() for line in soup.text.split('\n'): today_year = str(_today.year) - yesterday_year = str((_today-datetime.timedelta(days=1)).year) + yesterday_year = str((_today - datetime.timedelta(days=1)).year) # Looks for the first line that starts with the current year. # This also checks yesterday's year so it doesn't break on January 1st at 00:00 UTC # before apod.nasa.gov uploads a new image. @@ -285,7 +300,7 @@ def _date(soup): raise Exception('Date not found in soup data.') -def parse_apod(dt, use_default_today_date=False, thumbs=False): +def _image_url(dt, use_default_today_date=False, thumbs=False): """ Accepts a date in '%Y-%m-%d' format. Returns the URL of the APOD image of that day, noting that @@ -294,8 +309,8 @@ def parse_apod(dt, use_default_today_date=False, thumbs=False): LOG.debug('apod chars called date:' + str(dt)) try: - return _get_apod_chars(dt, thumbs) - + data = _get_apod_chars(dt, thumbs) + return data['url'] except Exception as ex: # handle edge case where the service local time diff --git a/apod_parser/apod_object_parser.py b/apod_parser/apod_object_parser.py index df2d627..c51c1c1 100644 --- a/apod_parser/apod_object_parser.py +++ b/apod_parser/apod_object_parser.py @@ -3,6 +3,7 @@ import os from PIL import Image + def get_data(api_key): raw_response = requests.get(f'https://api.nasa.gov/planetary/apod?api_key={api_key}').text response = json.loads(raw_response) @@ -14,9 +15,9 @@ def get_date(response): return date -def get_explaination(response): - explaination = response['explanation'] - return explaination +def get_explanation(response): + explanation = response['explanation'] + return explanation def get_hdurl(response): @@ -28,7 +29,8 @@ def get_media_type(response): media_type = response['media_type'] return media_type -def get_service_version(response): + +def get_service_version(response): service_version = response['service_version'] return service_version @@ -37,16 +39,18 @@ def get_title(response): service_version = response['title'] return service_version + def get_url(response): url = response['url'] return url + def download_image(url, date): - if os.path.isfile(f'{date}.png') == False: + if not os.path.isfile(f'{date}.png'): raw_image = requests.get(url).content with open(f'{date}.jpg', 'wb') as file: file.write(raw_image) - + else: return FileExistsError diff --git a/application.py b/application.py index aed8917..3af3155 100644 --- a/application.py +++ b/application.py @@ -13,8 +13,9 @@ @author=JustinGOSSES @email=justin.c.gosses@nasa.gov """ import sys + sys.path.insert(0, "../lib") -### justin edit +# justin edit sys.path.insert(1, ".") from datetime import datetime, date @@ -24,8 +25,8 @@ from apod.utility import parse_apod, get_concepts import logging -#### added by justin for EB -#from wsgiref.simple_server import make_server +# added by justin for EB +# from wsgiref.simple_server import make_server application = Flask(__name__) CORS(application, resources={r"/*": {"expose_headers": ["X-RateLimit-Limit","X-RateLimit-Remaining"]} }) @@ -44,9 +45,9 @@ try: with open('alchemy_api.key', 'r') as f: ALCHEMY_API_KEY = f.read() -#except FileNotFoundError: +# except FileNotFoundError: except IOError: - LOG.info('WARNING: NO alchemy_api.key found, concept_tagging is NOT supported') + LOG.info('WARNING: NO alchemy_api.key found, concept_tagging is NOT supported') def _abort(code, msg, usage=True): @@ -92,7 +93,7 @@ def _apod_handler(dt, use_concept_tags=False, use_default_today_date=False, thum served through the API. """ try: - + page_props = parse_apod(dt, use_default_today_date, thumbs) if not page_props: return None @@ -167,7 +168,7 @@ def _get_json_for_random_dates(count, use_concept_tags, thumbs): for date_ordinal in random_date_ordinals: dt = date.fromordinal(date_ordinal) data = _apod_handler(dt, use_concept_tags, date_ordinal == today_ordinal, thumbs) - + # Handle case where no data is available if not data: continue @@ -214,7 +215,7 @@ def _get_json_for_date_range(start_date, end_date, use_concept_tags, thumbs): while start_ordinal <= end_ordinal: # get data dt = date.fromordinal(start_ordinal) - + data = _apod_handler(dt, use_concept_tags, start_ordinal == today_ordinal, thumbs) # Handle case where no data is available @@ -245,6 +246,7 @@ def home(): methodname=APOD_METHOD_NAME, usage=_usage(joinstr='", "', prestr='"') + '"') + @application.route('/static/') def serve_static(asset_path): return current_app.send_static_file(asset_path) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 3537c84..0000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -# This requirements file lists all third-party dependencies for this project. -# -# Run 'pip install -r requirements.txt -t lib/' to install these dependencies -# in `lib/` subdirectory. -# -# Note: The `lib` directory is added to `sys.path` by `appengine_config.py`. -flask>=1.0.2 -flask-cors>=3.0.7 -gunicorn==19.5.0 -Jinja2>=2.8 -Werkzeug>=0.10.4 -beautifulsoup4==4.5.3 -requests>=2.20.0 -coverage==4.1 -nose==1.3.7 -setupext-janitor==1.0.0 -bs4==0.0.1 -mock>=3.0.0 -Pillow==8.3.2 \ No newline at end of file diff --git a/tests/apod/test_utility.py b/tests/apod/test_utility.py index ed17e7c..73a40f3 100644 --- a/tests/apod/test_utility.py +++ b/tests/apod/test_utility.py @@ -1,29 +1,29 @@ #!/bin/sh/python -# coding= utf-8 -import unittest -from apod import utility -import logging - -logging.basicConfig(level=logging.DEBUG) +# coding= utf-8 +import pytest +from bs4 import BeautifulSoup +import requests from datetime import datetime -class TestApod(unittest.TestCase): - """Test the extraction of APOD characteristics.""" - - TEST_DATA = { - 'normal page, copyright' : + +import apod.utility + +BASE = 'https://apod.nasa.gov/apod/' +TEST_URL = 'https://apod.nasa.gov/apod/ap170322.html' +TEST_DATA = { + 'normal page, copyright': { "datetime": datetime(2017, 3, 22), "copyright": 'Robert Gendler', - "date": "2017-03-22", - "explanation": "In cosmic brush strokes of glowing hydrogen gas, this beautiful skyscape unfolds across the plane of our Milky Way Galaxy near the northern end of the Great Rift and the center of the constellation Cygnus the Swan. A 36 panel mosaic of telescopic image data, the scene spans about six degrees. Bright supergiant star Gamma Cygni (Sadr) to the upper left of the image center lies in the foreground of the complex gas and dust clouds and crowded star fields. Left of Gamma Cygni, shaped like two luminous wings divided by a long dark dust lane is IC 1318 whose popular name is understandably the Butterfly Nebula. The more compact, bright nebula at the lower right is NGC 6888, the Crescent Nebula. Some distance estimates for Gamma Cygni place it at around 1,800 light-years while estimates for IC 1318 and NGC 6888 range from 2,000 to 5,000 light-years.", - "hdurl": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg", - "media_type": "image", - "service_version": "v1", - "title": "Central Cygnus Skyscape", - "url": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg", + "date": "2017-03-22", + "explanation": "In cosmic brush strokes of glowing hydrogen gas, this beautiful skyscape unfolds across the plane of our Milky Way Galaxy near the northern end of the Great Rift and the center of the constellation Cygnus the Swan. A 36 panel mosaic of telescopic image data, the scene spans about six degrees. Bright supergiant star Gamma Cygni (Sadr) to the upper left of the image center lies in the foreground of the complex gas and dust clouds and crowded star fields. Left of Gamma Cygni, shaped like two luminous wings divided by a long dark dust lane is IC 1318 whose popular name is understandably the Butterfly Nebula. The more compact, bright nebula at the lower right is NGC 6888, the Crescent Nebula. Some distance estimates for Gamma Cygni place it at around 1,800 light-years while estimates for IC 1318 and NGC 6888 range from 2,000 to 5,000 light-years.", + "hdurl": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg", + "media_type": "image", + "service_version": "v1", + "title": "Central Cygnus Skyscape", + "url": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg", }, - 'newer page, Reprocessing & copyright' : + 'newer page, Reprocessing & copyright': { "datetime": datetime(2017, 2, 8), "copyright": "Jes�s M.Vargas & Maritxu Poyal", @@ -34,8 +34,8 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "The Butterfly Nebula from Hubble", "url": "https://apod.nasa.gov/apod/image/1702/Butterfly_HubbleVargas_960.jpg" - }, - 'older page, copyright' : + }, + 'older page, copyright': { "datetime": datetime(2015, 11, 15), "copyright": "Sean M. Sabatini", @@ -46,11 +46,11 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "Leonids Over Monument Valley", "url": "https://apod.nasa.gov/apod/image/1511/leonidsmonuments_sabatini_960.jpg" - }, - 'older page, copyright #2' : + }, + 'older page, copyright #2': { "datetime": datetime(2013, 3, 11), - # this illustrates problematic, but still functional parsing of the copyright + # this illustrates problematic, but still functional parsing of the copyright "copyright": 'Martin RietzeAlien Landscapes on Planet Earth', "date": "2013-03-11", "explanation": "Why does a volcanic eruption sometimes create lightning? Pictured above, the Sakurajima volcano in southern Japan was caught erupting in early January. Magma bubbles so hot they glow shoot away as liquid rock bursts through the Earth's surface from below. The above image is particularly notable, however, for the lightning bolts caught near the volcano's summit. Why lightning occurs even in common thunderstorms remains a topic of research, and the cause of volcanic lightning is even less clear. Surely, lightning bolts help quench areas of opposite but separated electric charges. One hypothesis holds that catapulting magma bubbles or volcanic ash are themselves electrically charged, and by their motion create these separated areas. Other volcanic lightning episodes may be facilitated by charge-inducing collisions in volcanic dust. Lightning is usually occurring somewhere on Earth, typically over 40 times each second.", @@ -59,8 +59,8 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "Sakurajima Volcano with Lightning", "url": "https://apod.nasa.gov/apod/image/1303/volcano_reitze_960.jpg" - }, - 'older page, no copyright' : + }, + 'older page, no copyright': { "datetime": datetime(1998, 6, 19), "date": "1998-06-19", @@ -72,7 +72,7 @@ class TestApod(unittest.TestCase): "title": "Good Morning Mars", "url": "https://apod.nasa.gov/apod/image/9806/tharsis_mgs.jpg" }, - 'older page, no copyright, #2' : + 'older page, no copyright, #2': { "datetime": datetime(2012, 8, 30), "date": "2012-08-30", @@ -84,27 +84,62 @@ class TestApod(unittest.TestCase): "title": "Apollo 11 Landing Site Panorama", "url": "https://apod.nasa.gov/apod/image/1208/a11pan1040226lftsm600.jpg" }, - } - - def _test_harness(self, test_title, data): - - print ("Testing "+test_title) - - # make the call - values = utility.parse_apod(data['datetime']) - - # Test returned properties - for prop in values.keys(): - if prop == "copyright": - print(str(values['copyright'])) - self.assertEqual(values[prop], data[prop], "Test of property: "+prop) - - - def test_apod_characteristics(self): - - for page_type in TestApod.TEST_DATA.keys(): - self._test_harness(page_type, TestApod.TEST_DATA[page_type]) - - - - +} + + +def soups_on(): + url = apod.utility._format_url(TEST_DATA['normal page, copyright']['datetime']) + response = requests.get(url) + soup = BeautifulSoup(response.text, 'html.parser') + return soup + + +def test__get_last_url(): + urls = 'https://www.google.com https://www.yahoo.com https://www.bing.com' + test_call = apod.utility._get_last_url(urls) + assert test_call == 'https://www.bing.com' + + +# hit other if statement +def test__format_url(): + url = apod.utility._format_url(TEST_DATA['normal page, copyright']['datetime']) + assert url == TEST_URL + + +def test__get_apod_chars(): + data = apod.utility._get_apod_chars(TEST_DATA['normal page, copyright']['datetime'], '') + assert data['copyright'] == 'Robert Gendler' + assert data['date'] == '2017-03-22' + assert data['hdurl'] == 'https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg' + assert data['media_type'] == 'image' + assert data['title'] == 'Central Cygnus Skyscape' + assert data['url'] == 'https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg' + + +def test__title(): + soup = soups_on() + title = apod.utility._title(soup) + assert title == 'Central Cygnus Skyscape' + + +def test__copyright(): + soup = soups_on() + copyright = apod.utility._copyright(soup) + assert copyright == TEST_DATA['normal page, copyright']['copyright'] + + +def test__explanation(): + soup = soups_on() + explanation = apod.utility._explanation(soup) + assert explanation == TEST_DATA['normal page, copyright']['explanation'] + + +# def test__date(): +# soup = soups_on() +# date = apod.utility._date(soup) +# assert date == TEST_DATA['normal page, copyright']['date'] + + +def test__image_url(): + image_url = apod.utility._image_url(TEST_DATA['normal page, copyright']['datetime']) + assert image_url == TEST_DATA['normal page, copyright']['url']