Julie Butler – final_sample1_solutions

#############
## IMPORTS ##
#############
# Install and import pymysql to connect to
# the database
!#pip install pymysql
import pymysql
# Pandas for dataframes
import pandas as pd 
# Matplotlib for graphs
import matplotlib.pyplot as plt

#############################
## CONNECT TO THE DATABASE ##
#############################
connection = pymysql.connect(
host = "dsc140.cmgtlybtlnmh.us-east-2.rds.amazonaws.com",
user = "guest",
password = "UMUdsc140",
db="orders")

conn = connection.cursor()

################
## QUESTION 1 ##
################
# Identify and print the names of all the tables in the database. 
# Look at the columns within some of the tables and briefly describe 
# what this database appears to be all about.

# Gather the names of the tables in the database
tables = "SHOW TABLES"
output = conn.execute(tables)
tables = conn.fetchall()
tables = [tab[0] for tab in tables]

print(tables)

# Iterate through each table in the database
for table in tables:
    # Print the table name
    print("Table:", table)
    # Get the names of the columns for the table
    columns = "SHOW COLUMNS FROM "+table
    output = conn.execute(columns)
    columns = conn.fetchall()
    columns = [col[0] for col in columns]
    # Print the names of the columns
    print("Columns:", columns)
    
    # Print a blank line
    print()

# This data set seems to contain data for a company who sales products
# to customers. There is information about the employees, customers,
# products, and orders.

['categories', 'customers', 'employees', 'orders', 'ordersdetails', 'products', 'shippers', 'suppliers']
Table: categories
Columns: ['CategoryID', 'CategoryName', 'DescriptionText']

Table: customers
Columns: ['CustomerID', 'CustomerName', 'ContractName', 'Address', 'City', 'PostalCode', 'Country']

Table: employees
Columns: ['EmployeeID', 'LastName', 'FirstName', 'BirthDate', 'Photo', 'Notes']

Table: orders
Columns: ['OrderID', 'CustomerID', 'EmployeeID', 'OrderDate', 'ShipperID']

Table: ordersdetails
Columns: ['OrderDetailID', 'OrderID', 'ProductID', 'Quantity']

Table: products
Columns: ['ProductID', 'ProductName', 'SuppliersID', 'CategoryID', 'Unit', 'Price']

Table: shippers
Columns: ['ShipperID', 'ShipperName', 'Phone']

Table: suppliers
Columns: ['SupplierID', 'SuppliersName', 'ContactName', 'Address', 'City', 'PostalCode', 'Country', 'Phone']

################
## QUESTION 2 ##
################
# Find the sale amount for every order. To do this you’ll 
# need to find how many units were sold in each order and 
# the price per unit for the corresponding item that was 
# sold, then multiply the values together.

# Use an inner join to get the correct data
# We need quantity from ordersdetails, price from products, 
# and order id from odersdetails
inner_join = "SELECT o.Quantity, p.Price, o.OrderID FROM ordersdetails o INNER JOIN\
    products p ON o.ProductID=p.ProductID"
output = conn.execute(inner_join)
data = conn.fetchall()

# Convert the data to a datafra,e
data = pd.DataFrame(data, columns=["Quantity", "Price", "Order ID"])
# Compute the total price of the product ordered
data["Total Price"] = data["Quantity"]*data["Price"]
# Use a groupby to find the total price for each of the orders
# Previously we had the total price for each product ordered by some
# orders have multiple products
total_price_order_data = data.groupby(by="Order ID")["Total Price"].sum()
total_price_order_data

Order ID
10248     566.00
10249    2329.25
10250    2267.25
10251     418.50
10252    4662.50
          ...   
10320     645.00
10321     180.00
10322     140.00
10323     205.50
10324    4186.45
Name: Total Price, Length: 77, dtype: float64

################
## QUESTION 3 ##
################
# Make a histogram of the sale amounts you found in #2. 
plt.hist(total_price_order_data)
plt.xlabel("Total Order Price")
plt.ylabel("Number of Orders")

Text(0, 0.5, 'Number of Orders')

################
## QUESTION 4 ##
###############
# Generate a pandas dataframe that lists the customerID for all 
# customers served by the employee with EmployeeID “8”. 
# Display the dataframe.

# Get the data using the where keyword to only pull the selected data
# You can also pull all of the data and then use a mask on the resulting
# dataframe
employee_8 = "SELECT CustomerID FROM orders WHERE EmployeeID=8"
output = conn.execute(employee_8)
customers = conn.fetchall()

# Add the data to a dataframe
customers = pd.DataFrame(customers, columns=["Customer ID"])
customers

	Customer ID
0	65
1	33
2	80
3	5
4	44
5	63
6	67
7	15
8	86
9	55
10	77
11	38
12	84
13	58
14	29
15	75
16	37
17	4
18	83

################
## QUESTION 5 ##
###############
# Generate a pandas dataframe that lists the Customer’s Name and 
# their corresponding country of residence, again only for customers 
# served by the employee with EmployeeID “8”. Display the dataframe.

# Use an inner hoin to get the needed data while also still using the where
# statement to pull only selected data
employee_8_expanded = "SELECT c.CustomerName, c.Country FROM customers c INNER\
    JOIN orders o ON o.CustomerID=c.CustomerID WHERE o.EmployeeID=8"
output = conn.execute(employee_8_expanded)
employee_8_expanded_data = conn.fetchall()

# Create a dataframe
employee_8_expanded_data = pd.DataFrame(employee_8_expanded_data,
                                        columns=["Customer Name", "Country"])
employee_8_expanded_data

	Customer Name	Country
0	Rattlesnake Canyon Grocery	USA
1	GROSELLA-Restaurante	Venezuela
2	Tortuga Restaurante	Mexico
3	Berglunds snabbköp	Sweden
4	Lehmanns Marktstand	Germany
5	QUICK-Stop	Germany
6	Ricardo Adocicados	Brazil
7	Comércio Mineiro	Brazil
8	Die Wandernde Kuh	Germany
9	Old World Delicatessen	USA
10	The Big Cheese	USA
11	Island Trading	UK
12	Victuailles en stock	France
13	Pericles Comidas clásicas	Mexico
14	Galería del gastrónomo	Spain
15	Split Rail Beer & Ale	USA
16	Hungry Owl All-Night Grocers	Ireland
17	Around the Horn	UK
18	Vaffeljernet	Denmark

"SELECT o.OrderID, d.EmployeeID FROM order o INNER JOIN ordersdetails d ON o.OrderID=d.OrderID WHERE "

import scipy.stats as s

s.pearsonr()