Presentation is loading. Please wait.

Presentation is loading. Please wait.

Scatterplot #SCATTERPLOT: USEFUL FOR PLOTTING RELATIONSHIPS BETWEEN TWO NUMERIC VARIABLES library(ggvis) library(DBI) require(RMySQL) # set a driver m<-

Similar presentations


Presentation on theme: "Scatterplot #SCATTERPLOT: USEFUL FOR PLOTTING RELATIONSHIPS BETWEEN TWO NUMERIC VARIABLES library(ggvis) library(DBI) require(RMySQL) # set a driver m<-"— Presentation transcript:

1 Scatterplot #SCATTERPLOT: USEFUL FOR PLOTTING RELATIONSHIPS BETWEEN TWO NUMERIC VARIABLES library(ggvis) library(DBI) require(RMySQL) # set a driver m<- dbDriver("MySQL") # connect to the database conn <- dbConnect(m,user='student',password='student',host='wallaby.terry.uga.edu',dbname='ClassicModels') # if error "in .local(drv, ...): cannot allocate a new connection: 16 connections already opened" appears loop through the connections and delete them. If there is no problem move on to query the database. cons<- dbListConnections(MySQL()) for(con in cons) dbDisconnect(con) # Get the monthly total revenue of orders (or total revenue by month) e <- dbGetQuery(conn,"SELECT MONTH(orderDate) AS orderMonth, SUM(quantityOrdered*priceEach) AS totalOrderRevenue FROM Orders, OrderDetails WHERE Orders.orderNumber = OrderDetails.orderNumber GROUP BY orderMonth;") head(e) # Plot data orders by month# Show the points and the line e %>% ggvis(~orderMonth, ~totalOrderRevenue/ ) %>% layer_lines(stroke:='blue') %>%layer_points(fill:='red') %>%add_axis('x', title = 'Month') %>%add_axis('y',title='Total Revenue (Millions)', title_offset=30)

2 Scatterplot

3 Scatterplot # Get total revenue of orders by month and year.
f <- dbGetQuery(conn,"SELECT YEAR(orderDate) AS orderYear, MONTH(orderDate) AS orderMonth, SUM((quantityOrdered*priceEach)) AS totalRevenue FROM Orders, OrderDetails WHERE Orders.orderNumber = OrderDetails.orderNumber GROUP BY orderYear, orderMonth;") head(f) # ggvis expects grouping variables to be a factor, so convert f$orderYear <- as.factor(f$orderYear) # Plot total order revenue by month and display by year f %>% group_by(orderYear) %>% ggvis(~orderMonth,~totalRevenue/1000, stroke = ~orderYear) %>% layer_lines() %>% add_axis('x', title = 'Month') %>% add_axis('y',title=‘Total Revenue (Thousands)', title_offset=50)

4 Scatterplot

5 Exercise Using the Text database, write a query to calculate total revenue for sold items by sale number. Plot the results of your query using ggvis as a scatterplot.

6 Multiple files library(sqldf)
options(sqldf.driver = "SQLite") # to avoid conflict with RMySQL # Get total revenue of order by month for year 2004 orders <- dbGetQuery(conn,"SELECT 'Orders' as category, MONTH(orderDate) AS month, SUM((quantityOrdered*priceEach)) AS totalValue FROM Orders, OrderDetails WHERE Orders.orderNumber = OrderDetails.orderNumber and YEAR(orderDate) = 2004 GROUP BY month;”) head(orders) # Get total payment amount by month for year 2004 payments <- dbGetQuery(conn,"SELECT 'Payments' as category, MONTH(paymentDate) AS month, SUM(amount) AS value FROM Payments WHERE YEAR(paymentDate) = 2004 GROUP BY month;”) head(payments) # Concatenate the two files g <- sqldf(”SELECT month, category, totalValue FROM orders UNION SELECT month, category, totalValue FROM payments”) head(g) # Plot total value of payment and total order revenue by month for year 2004 g %>% group_by(category) %>% ggvis(~month, ~totalValue, stroke = ~ category) %>% layer_lines() %>% add_axis('x',title='Month') %>% add_axis('y',title=’Total Value',title_offset=70)

7 Multiple files

8 Smoothing # SMOOTHING: HELPS DETECT A TREND IN A LINE PLOT
url <- " t <- read.table(url, header=T, sep=',') head(t) t11 <- sqldf(’SELECT * FROM t where month = 11') t11 %>% ggvis(~year,~temperature) %>% layer_lines(stroke:='red') %>% layer_smooths(se=T, stroke:='blue') %>% add_axis('x',title='Year', format = '####') %>% add_axis('y',title='Temperature (F)', title_offset=50)

9 Smoothing

10 Box plot # BOXPLOTS: EFFECTIVE MEANS OF DISPLAYING INFORMATION ABOUT ONE OR MORE VARIABLES i <- dbGetQuery(conn,"SELECT amount FROM Payments;") # Boxplot of payment amount i %>% ggvis(~factor(0),~amount) %>% layer_boxplots() %>% add_axis('x',title='Checks') %>% add_axis('y',title='')

11 Box plot k <- dbGetQuery(conn,"SELECT month(paymentDate) as month, amount FROM Payments WHERE year(paymentDate) = 2004;") head(k) # Boxplot of amounts paid per month in 2004 k %>% ggvis(~factor(month),~amount) %>% layer_boxplots() %>% add_axis('x',title='Month in 2004') %>% add_axis('y',title='Check Amount', title_offset=60)

12 Exercise Using the ClassicModels database, write a query to report order revenue and month order date for year Plot the results of your query using ggvis as a boxplot.

13 Geographic data ggmap supports multiple mapping systems, including Google maps library(ggplot2) library(ggmap) library(mapproj) library(DBI) # Google maps requires lon and lat, in that order, to create markers d <- dbGetQuery(conn,"SELECT y(officeLocation) AS lon, x(officeLocation) AS lat FROM Offices;") # show offices in the United States # vary zoom to change the size of the map map <- get_googlemap('united states',marker=d,zoom=4) ggmap(map) + labs(x = 'Longitude', y = 'Latitude') + ggtitle('US offices')

14 Map

15 John Snow 1854 Broad Street cholera map
Water pump

16 Cholera map (now Broadwick Street)
library(ggplot2) library(ggmap) library(mapproj) library(readr) url <- ' pumps <- read_delim(url, delim=',') url <- ' deaths <- read_delim(url, delim=',') map <- get_googlemap('broadwick street, london, united kingdom',markers=pumps,zoom=15) ggmap(map) + labs(x = 'Longitude', y = 'Latitude') + ggtitle('Pumps and deaths') + geom_point(aes(x=longitude,y=latitude,size=count),color='blue',data=deaths) + xlim(-.14,-.13) + ylim(51.51,51.516)

17 Key points ggvis is based on a grammar of graphics
Very powerful and logical Supports interactive graphics You can visualize the results of SQL queries using R The combination of MySQL and R provides a strong platform for data reporting


Download ppt "Scatterplot #SCATTERPLOT: USEFUL FOR PLOTTING RELATIONSHIPS BETWEEN TWO NUMERIC VARIABLES library(ggvis) library(DBI) require(RMySQL) # set a driver m<-"

Similar presentations


Ads by Google