#############
## IMPORTS ##
#############
# Install and import pymysql to connect to
# the database
!#pip install pymysql
import pymysql
# Pandas for dataframes
import pandas as pd
# Matplotlib for graphs
import matplotlib.pyplot as plt#############################
## CONNECT TO THE DATABASE ##
#############################
connection = pymysql.connect(
host = "dsc140.cmgtlybtlnmh.us-east-2.rds.amazonaws.com",
user = "guest",
password = "UMUdsc140",
db="orders")
conn = connection.cursor()################
## QUESTION 1 ##
################
# Identify and print the names of all the tables in the database.
# Look at the columns within some of the tables and briefly describe
# what this database appears to be all about.
# Gather the names of the tables in the database
tables = "SHOW TABLES"
output = conn.execute(tables)
tables = conn.fetchall()
tables = [tab[0] for tab in tables]
print(tables)
# Iterate through each table in the database
for table in tables:
# Print the table name
print("Table:", table)
# Get the names of the columns for the table
columns = "SHOW COLUMNS FROM "+table
output = conn.execute(columns)
columns = conn.fetchall()
columns = [col[0] for col in columns]
# Print the names of the columns
print("Columns:", columns)
# Print a blank line
print()
# This data set seems to contain data for a company who sales products
# to customers. There is information about the employees, customers,
# products, and orders.['categories', 'customers', 'employees', 'orders', 'ordersdetails', 'products', 'shippers', 'suppliers']
Table: categories
Columns: ['CategoryID', 'CategoryName', 'DescriptionText']
Table: customers
Columns: ['CustomerID', 'CustomerName', 'ContractName', 'Address', 'City', 'PostalCode', 'Country']
Table: employees
Columns: ['EmployeeID', 'LastName', 'FirstName', 'BirthDate', 'Photo', 'Notes']
Table: orders
Columns: ['OrderID', 'CustomerID', 'EmployeeID', 'OrderDate', 'ShipperID']
Table: ordersdetails
Columns: ['OrderDetailID', 'OrderID', 'ProductID', 'Quantity']
Table: products
Columns: ['ProductID', 'ProductName', 'SuppliersID', 'CategoryID', 'Unit', 'Price']
Table: shippers
Columns: ['ShipperID', 'ShipperName', 'Phone']
Table: suppliers
Columns: ['SupplierID', 'SuppliersName', 'ContactName', 'Address', 'City', 'PostalCode', 'Country', 'Phone']
################
## QUESTION 2 ##
################
# Find the sale amount for every order. To do this you’ll
# need to find how many units were sold in each order and
# the price per unit for the corresponding item that was
# sold, then multiply the values together.
# Use an inner join to get the correct data
# We need quantity from ordersdetails, price from products,
# and order id from odersdetails
inner_join = "SELECT o.Quantity, p.Price, o.OrderID FROM ordersdetails o INNER JOIN\
products p ON o.ProductID=p.ProductID"
output = conn.execute(inner_join)
data = conn.fetchall()
# Convert the data to a datafra,e
data = pd.DataFrame(data, columns=["Quantity", "Price", "Order ID"])
# Compute the total price of the product ordered
data["Total Price"] = data["Quantity"]*data["Price"]
# Use a groupby to find the total price for each of the orders
# Previously we had the total price for each product ordered by some
# orders have multiple products
total_price_order_data = data.groupby(by="Order ID")["Total Price"].sum()
total_price_order_dataOrder ID
10248 566.00
10249 2329.25
10250 2267.25
10251 418.50
10252 4662.50
...
10320 645.00
10321 180.00
10322 140.00
10323 205.50
10324 4186.45
Name: Total Price, Length: 77, dtype: float64
################
## QUESTION 3 ##
################
# Make a histogram of the sale amounts you found in #2.
plt.hist(total_price_order_data)
plt.xlabel("Total Order Price")
plt.ylabel("Number of Orders")Text(0, 0.5, 'Number of Orders')

################
## QUESTION 4 ##
###############
# Generate a pandas dataframe that lists the customerID for all
# customers served by the employee with EmployeeID “8”.
# Display the dataframe.
# Get the data using the where keyword to only pull the selected data
# You can also pull all of the data and then use a mask on the resulting
# dataframe
employee_8 = "SELECT CustomerID FROM orders WHERE EmployeeID=8"
output = conn.execute(employee_8)
customers = conn.fetchall()
# Add the data to a dataframe
customers = pd.DataFrame(customers, columns=["Customer ID"])
customers| Customer ID | |
|---|---|
| 0 | 65 |
| 1 | 33 |
| 2 | 80 |
| 3 | 5 |
| 4 | 44 |
| 5 | 63 |
| 6 | 67 |
| 7 | 15 |
| 8 | 86 |
| 9 | 55 |
| 10 | 77 |
| 11 | 38 |
| 12 | 84 |
| 13 | 58 |
| 14 | 29 |
| 15 | 75 |
| 16 | 37 |
| 17 | 4 |
| 18 | 83 |
################
## QUESTION 5 ##
###############
# Generate a pandas dataframe that lists the Customer’s Name and
# their corresponding country of residence, again only for customers
# served by the employee with EmployeeID “8”. Display the dataframe.
# Use an inner hoin to get the needed data while also still using the where
# statement to pull only selected data
employee_8_expanded = "SELECT c.CustomerName, c.Country FROM customers c INNER\
JOIN orders o ON o.CustomerID=c.CustomerID WHERE o.EmployeeID=8"
output = conn.execute(employee_8_expanded)
employee_8_expanded_data = conn.fetchall()
# Create a dataframe
employee_8_expanded_data = pd.DataFrame(employee_8_expanded_data,
columns=["Customer Name", "Country"])
employee_8_expanded_data| Customer Name | Country | |
|---|---|---|
| 0 | Rattlesnake Canyon Grocery | USA |
| 1 | GROSELLA-Restaurante | Venezuela |
| 2 | Tortuga Restaurante | Mexico |
| 3 | Berglunds snabbköp | Sweden |
| 4 | Lehmanns Marktstand | Germany |
| 5 | QUICK-Stop | Germany |
| 6 | Ricardo Adocicados | Brazil |
| 7 | Comércio Mineiro | Brazil |
| 8 | Die Wandernde Kuh | Germany |
| 9 | Old World Delicatessen | USA |
| 10 | The Big Cheese | USA |
| 11 | Island Trading | UK |
| 12 | Victuailles en stock | France |
| 13 | Pericles Comidas clásicas | Mexico |
| 14 | Galería del gastrónomo | Spain |
| 15 | Split Rail Beer & Ale | USA |
| 16 | Hungry Owl All-Night Grocers | Ireland |
| 17 | Around the Horn | UK |
| 18 | Vaffeljernet | Denmark |
"SELECT o.OrderID, d.EmployeeID FROM order o INNER JOIN ordersdetails d ON o.OrderID=d.OrderID WHERE "import scipy.stats as s
s.pearsonr()