#############
## IMPORTS ##
#############
# Install and import pymysql to connect to
# the database
!#pip install pymysql
import pymysql
# Pandas for dataframes
import pandas as pd 
# Matplotlib for graphs
import matplotlib.pyplot as plt
#############################
## CONNECT TO THE DATABASE ##
#############################
connection = pymysql.connect(
host = "dsc140.cmgtlybtlnmh.us-east-2.rds.amazonaws.com",
user = "guest",
password = "UMUdsc140",
db="orders")

conn = connection.cursor()
################
## QUESTION 1 ##
################
# Identify and print the names of all the tables in the database. 
# Look at the columns within some of the tables and briefly describe 
# what this database appears to be all about.

# Gather the names of the tables in the database
tables = "SHOW TABLES"
output = conn.execute(tables)
tables = conn.fetchall()
tables = [tab[0] for tab in tables]

print(tables)

# Iterate through each table in the database
for table in tables:
    # Print the table name
    print("Table:", table)
    # Get the names of the columns for the table
    columns = "SHOW COLUMNS FROM "+table
    output = conn.execute(columns)
    columns = conn.fetchall()
    columns = [col[0] for col in columns]
    # Print the names of the columns
    print("Columns:", columns)
    
    # Print a blank line
    print()

# This data set seems to contain data for a company who sales products
# to customers. There is information about the employees, customers,
# products, and orders.
['categories', 'customers', 'employees', 'orders', 'ordersdetails', 'products', 'shippers', 'suppliers']
Table: categories
Columns: ['CategoryID', 'CategoryName', 'DescriptionText']

Table: customers
Columns: ['CustomerID', 'CustomerName', 'ContractName', 'Address', 'City', 'PostalCode', 'Country']

Table: employees
Columns: ['EmployeeID', 'LastName', 'FirstName', 'BirthDate', 'Photo', 'Notes']

Table: orders
Columns: ['OrderID', 'CustomerID', 'EmployeeID', 'OrderDate', 'ShipperID']

Table: ordersdetails
Columns: ['OrderDetailID', 'OrderID', 'ProductID', 'Quantity']

Table: products
Columns: ['ProductID', 'ProductName', 'SuppliersID', 'CategoryID', 'Unit', 'Price']

Table: shippers
Columns: ['ShipperID', 'ShipperName', 'Phone']

Table: suppliers
Columns: ['SupplierID', 'SuppliersName', 'ContactName', 'Address', 'City', 'PostalCode', 'Country', 'Phone']
################
## QUESTION 2 ##
################
# Find the sale amount for every order. To do this you’ll 
# need to find how many units were sold in each order and 
# the price per unit for the corresponding item that was 
# sold, then multiply the values together.

# Use an inner join to get the correct data
# We need quantity from ordersdetails, price from products, 
# and order id from odersdetails
inner_join = "SELECT o.Quantity, p.Price, o.OrderID FROM ordersdetails o INNER JOIN\
    products p ON o.ProductID=p.ProductID"
output = conn.execute(inner_join)
data = conn.fetchall()

# Convert the data to a datafra,e
data = pd.DataFrame(data, columns=["Quantity", "Price", "Order ID"])
# Compute the total price of the product ordered
data["Total Price"] = data["Quantity"]*data["Price"]
# Use a groupby to find the total price for each of the orders
# Previously we had the total price for each product ordered by some
# orders have multiple products
total_price_order_data = data.groupby(by="Order ID")["Total Price"].sum()
total_price_order_data
Order ID
10248     566.00
10249    2329.25
10250    2267.25
10251     418.50
10252    4662.50
          ...   
10320     645.00
10321     180.00
10322     140.00
10323     205.50
10324    4186.45
Name: Total Price, Length: 77, dtype: float64
################
## QUESTION 3 ##
################
# Make a histogram of the sale amounts you found in #2. 
plt.hist(total_price_order_data)
plt.xlabel("Total Order Price")
plt.ylabel("Number of Orders")
Text(0, 0.5, 'Number of Orders')

################
## QUESTION 4 ##
###############
# Generate a pandas dataframe that lists the customerID for all 
# customers served by the employee with EmployeeID “8”. 
# Display the dataframe.

# Get the data using the where keyword to only pull the selected data
# You can also pull all of the data and then use a mask on the resulting
# dataframe
employee_8 = "SELECT CustomerID FROM orders WHERE EmployeeID=8"
output = conn.execute(employee_8)
customers = conn.fetchall()

# Add the data to a dataframe
customers = pd.DataFrame(customers, columns=["Customer ID"])
customers
Customer ID
0 65
1 33
2 80
3 5
4 44
5 63
6 67
7 15
8 86
9 55
10 77
11 38
12 84
13 58
14 29
15 75
16 37
17 4
18 83
################
## QUESTION 5 ##
###############
# Generate a pandas dataframe that lists the Customer’s Name and 
# their corresponding country of residence, again only for customers 
# served by the employee with EmployeeID “8”. Display the dataframe.

# Use an inner hoin to get the needed data while also still using the where
# statement to pull only selected data
employee_8_expanded = "SELECT c.CustomerName, c.Country FROM customers c INNER\
    JOIN orders o ON o.CustomerID=c.CustomerID WHERE o.EmployeeID=8"
output = conn.execute(employee_8_expanded)
employee_8_expanded_data = conn.fetchall()

# Create a dataframe
employee_8_expanded_data = pd.DataFrame(employee_8_expanded_data,
                                        columns=["Customer Name", "Country"])
employee_8_expanded_data
Customer Name Country
0 Rattlesnake Canyon Grocery USA
1 GROSELLA-Restaurante Venezuela
2 Tortuga Restaurante Mexico
3 Berglunds snabbköp Sweden
4 Lehmanns Marktstand Germany
5 QUICK-Stop Germany
6 Ricardo Adocicados Brazil
7 Comércio Mineiro Brazil
8 Die Wandernde Kuh Germany
9 Old World Delicatessen USA
10 The Big Cheese USA
11 Island Trading UK
12 Victuailles en stock France
13 Pericles Comidas clásicas Mexico
14 Galería del gastrónomo Spain
15 Split Rail Beer & Ale USA
16 Hungry Owl All-Night Grocers Ireland
17 Around the Horn UK
18 Vaffeljernet Denmark
"SELECT o.OrderID, d.EmployeeID FROM order o INNER JOIN ordersdetails d ON o.OrderID=d.OrderID WHERE "
import scipy.stats as s

s.pearsonr()