Mission to Mars

Goal

To build a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page.

Data Source

Tools

Python
MongoDB
HTML/CSS
Jinja Template

App Set-up

Import dependencies (Make sure to start MongoDB before running the app).

from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import scrape_mars

Create an instance of Flask class and connect to MongoDB.

app=Flask(__name__)
mongo=PyMongo(app, uri="mongodb://localhost:27017/mars_db")

Get data from MongoDB and render it to index.html.

@app.route('/')
def index():
    content=mongo.db.results.find_one()
    return render_template('index.html',dict=content)

Start web scraping and store returned results into MongoDB.

@app.route('/scrape')
def scraper():
    data=scrape_mars.scrape()
    mongo.db.results.update_one({},{"$set": data},upsert=True)
    return redirect('/',code=302)

Run the app.

if __name__=='__main__':
    app.run()

Web Scraping

Import dependencies

import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from splinter import Browser
import time

Define a function to activate chrome driver.

def init__browser():

    executable_path={'executable_path':'/usr/local/bin/chromedriver'}
    return Browser('chrome',**executable_path,headless=False)

Define a function to trigger web scraping and create an empty dictionary to store all returned reults. Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text.

def scrape():

    scrapping_results={}
    browser=init__browser()
    nasa_url="https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    browser.visit(nasa_url)
    html=browser.html
    soup=bs(html,'html.parser')
    latest_news_title=soup.find('div',{"class":"content_title"}).a.text.strip()
    scrapping_results['latest_news_title']=latest_news_title
    latest_news_p=soup.find('div',{"class":"rollover_description_inner"}).text
    browser.quit()
    scrapping_results['latest_news_p']=latest_news_p

Visit JPL Featured Space Image to find the image url for the current Featured Mars Image.

    browser=init__browser()
    jpl_url='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(jpl_url)
    browser.click_link_by_partial_text('FULL IMAGE') 
    time.sleep(5) 
    browser.click_link_by_partial_text('more info') 
    html=browser.html
    soup=bs(html,'html.parser')
    result=soup.find('figure',{'class':'lede'}).a['href']
    jpl_base_url='https://www.jpl.nasa.gov/'
    jpl_featured_image_url=jpl_base_url+result
    scrapping_results['jpl_img']=jpl_featured_image_url
    browser.quit()

Visit the Mars Weather twitter account to scrape the latest Mars weather tweet from the page.

    browser=init__browser()
    twitter_url='https://twitter.com/MarsWxReport'
    browser.visit(twitter_url)
    html=browser.html
    soup=bs(html,'html.parser')
    results=soup.find_all('p',{'class':'TweetTextSize'})
    try:
        for tweet in results:
            unwanted=tweet.find('a')
            unwanted.extract()
    except AttributeError:
            print('ok')
    tweet_list=[tweet.text.strip() for tweet in results]
    for tweet in tweet_list:
        if ('InSight sol' in tweet):
            mars_weather=tweet.lstrip('InSight ')
            break
    scrapping_results['latest_tweet']=mars_weather
    browser.quit()

Visit the Mars Facts webpage to scrape the table containing facts about the planet including Diameter, Mass, etc.

    browser=init__browser()
    mars_facts_url='https://space-facts.com/mars/'
    browser.visit(mars_facts_url)
    html=browser.html
    soup=bs(html,'html.parser')
    first_column=soup.find_all('td',{'class':'column-1'})
    first_column_content=[i.text for i in first_column]
    second_column=soup.find_all('td',{'class':'column-2'})
    second_column_content=[i.text for i in second_column]
    table_df=pd.DataFrame({'Description':first_column_content,'Value':second_column_content})
    mars_facts_html_table=table_df.to_html(index=False,justify='center')
    scrapping_results['table']=mars_facts_html_table
    browser.quit()

Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.

    browser=init__browser()
    url='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)
    html=browser.html
    soup=bs(html,'html.parser')
    titles=soup.find_all('h3') 
    title_list=[title.text for title in titles]
    usgs_base_url='https://astrogeology.usgs.gov'
    next_page_urls=soup.find_all('div',{'class':'description'}) 
    next_page_full_url_list=[usgs_base_url+url.a['href'] for url in next_page_urls]
    original_image_url_list=[]
    for url in next_page_full_url_list:
        browser.visit(url)
        html=browser.html
        soup=bs(html,'html.parser')
        url=soup.find_all('li') 
        original_image_url_list.append(url[0].a['href'])
    browser.quit()   
    hemisphere_image_urls=[dict(title=title_list[i].replace(' Enhanced',''), img_url=original_image_url_list[i]) for i in range(4)]
    scrapping_results['hemisphere']=hemisphere_image_urls            

    return scrapping_results

Name		Name	Last commit message	Last commit date
Latest commit History 22 Commits
templates		templates
.gitignore		.gitignore
app.py		app.py
mission_to_mars.ipynb		mission_to_mars.ipynb
readme.md		readme.md
scrape_mars.py		scrape_mars.py
screen_shot.png		screen_shot.png

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Mission to Mars

Goal

Data Source

Tools

App Set-up

Web Scraping

Preview

About

Releases

Packages

Languages

hanyang2019/Mission_to_Mars

Folders and files

Latest commit

History

Repository files navigation

Mission to Mars

Goal

Data Source

Tools

App Set-up

Web Scraping

Preview

About

Topics

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages