[AccessD] Quartile

MartyConnelly martyconnelly at shaw.ca
Tue Feb 28 17:18:16 CST 2006


This code has excel limits, arrays limited to 64K but maybe the initial 
part using getrows on a recordset and moving the field
into another array might help you or maybe faster. Don Celko had an SQL 
percentile method in "SQL for Smarties" but even he suggested
using SAS or SPSS for large datasets or using OLAP.

Function TestRecordset()
 Dim strConnect As String
 Dim arRs As Variant
 Dim strsql As String

strsql = "SELECT DISTINCTROW Format([ShippedDate],""yyyy""),[Order 
Subtotals].Subtotal, Orders.OrderID " & _
"FROM Orders INNER JOIN [Order Subtotals] ON Orders.OrderID = [Order 
Subtotals].OrderID " & _
"WHERE (Orders.ShippedDate Is Not Null);"
Debug.Print strsql
  strConnect = "Provider=Microsoft.Jet.OLEDB.4.0;" & _
  "Data Source=""C:\Program Files\Microsoft Office\Office\Samples\Copy 
of Northwind.mdb"";"
 Dim objCnn As Object
 Dim objRs As Object
  Set objCnn = CreateObject("ADODB.Connection")
  objCnn.Open strConnect
  Set objRs = CreateObject("ADODB.Recordset")
  objRs.Open strsql, objCnn
   If objRs.EOF = False Then
       arRs = objRs.GetRows
   Else
       arRs = "no results"
   End If
 
  
   Dim arRsT() As Double
   Dim myubnd As Long
   Dim mylbnd As Long
   myubnd = UBound(arRs, 2)
   mylbnd = LBound(arRs, 2)
   ReDim arRsT(mylbnd To myubnd)
   'Excel arrays get confused unless this done
    ' with passed arrays and limits of 64K
   
   Dim i As Long
   'stuff sql query field "subtotal" into a new array
   For i = 0 To UBound(arRs, 2)
 
      arRsT(i) = arRs(1, i)
  
   Next i
 
   Debug.Print Pctile(arRsT, 0.75)
   Debug.Print "1st quartile=" & quartile(arRsT, 1)
    Debug.Print "2nd quartile=" & quartile(arRsT, 2)
     Debug.Print "3'd quartile=" & quartile(arRsT, 3)
      Debug.Print "min " & quartile(arRsT, 0)
       Debug.Print "max " & quartile(arRsT, 4)
        Debug.Print "95% percentile " & Pctile(arRsT, 0.95)
   objRs.Close
   Set objRs = Nothing
   objCnn.Close
   Set objCnn = Nothing
 
  TestRecordset = arRs
End Function
Function Pctile(data, pct) As Double
Dim objExcel As Object
    Set objExcel = CreateObject("Excel.Application")
    Pctile = objExcel.Application.percentile(data, pct)
    objExcel.Quit
    Set objExcel = Nothing
End Function
Function quartile(data, pct As Long) As Double
Dim objExcel As Object
    Set objExcel = CreateObject("Excel.Application")
    quartile = objExcel.Application.quartile(data, pct)
    objExcel.Quit
    Set objExcel = Nothing
End Function

Gustav Brock wrote:

>Hi all
>
>Anyone having a need to calculate quartiles?
>
>The few methods I've found assumes calculation on one full table which I think doesn't mimic real life except if you create a temp table. Thus I have made the function below to take a filter argument which makes it much more versatile.
>
>I cannot find a way to do this fast as you need a function to browse the recordset.
>
>You can test this out with the Northwind demo base.
>First, create a query, qdyCustomerYearLineQuantity
>
>  SELECT 
>    Country, 
>    Year([OrderDate]) AS OrderYear, 
>    Quantity
>  FROM 
>    (Customers 
>    INNER JOIN 
>      Orders 
>        ON Customers.CustomerID = Orders.CustomerID) 
>      INNER JOIN 
>        [Order Details] 
>          ON Orders.OrderID = [Order Details].OrderID;
>
>And another, qdyCustomerOrderYear:
>
>  SELECT 
>    Country, 
>    Year([OrderDate]) AS OrderYear
>  FROM 
>    Customers 
>    INNER JOIN 
>      Orders 
>        ON Customers.CustomerID = Orders.CustomerID
>  GROUP BY 
>    Country, 
>    Year([OrderDate]);
>
>Now create a query to calculate quartiles on quantity per customer's country:
>
>  SELECT 
>    Country, 
>    OrderYear, 
>    Quartile5("qdyCustomerYearLineQuantity","Quantity",1,"Country='" & [Country] & "' And OrderYear=" & [OrderYear] & "") AS Quartile1, 
>    Quartile5("qdyCustomerYearLineQuantity","Quantity",2,"Country='" & [Country] & "' And OrderYear=" & [OrderYear] & "") AS Quartile2, 
>    Quartile5("qdyCustomerYearLineQuantity","Quantity",3,"Country='" & [Country] & "' And OrderYear=" & [OrderYear] & "") AS Quartile3, 
>  FROM 
>    qdyCustomerOrderYear;
>
>Country	OrderYear	Quartile1	Quartile2	Quartile3
>Canada	1994	11,5	22	37
>Canada	1995	15	20	42
>Canada	1996	9	18	25
>Denmark	1994	8,25	12	17,25
>Denmark	1995	18	25	35
>Denmark	1996	15	25	30
>
>The function looks like this:
>
><code>
>
>Public Function Quartile5( _
>  ByVal strTable As String, _
>  ByVal strField As String, _
>  ByVal bytQuartile As Byte, _
>  Optional ByVal strFilter As String) _
>  As Double
>  
>  ' strTable :    Name of the table/query to analyze.
>  ' strField :    Name of the field to analyze.
>  ' bytQuartile:  Which min/max or median/quartile to calculate.
>  ' strFilter:    Optional filter expression.
>  '
>  ' Data must be in ascending order by strField.
>  ' Constant values and return values mimic those of
>  ' Excel's Quartile() function.
>  '
>  ' Returns:
>  '   Minimum, maximum, median or upper/lower quartile
>  '   of strField of strTable filtered on strFilter.
>  '
>  ' Reset function (clear Static dbs) by the call:
>  '   Call Quartile5("", "", 0)
>  '
>  ' 2006-02-27. Cactus Data ApS, CPH.
>    
>    
>    ' Find median.
>    Const cbytQuartMedian             As Byte = 2
>    ' Find lower (first) quartile.
>    Const cbytQuartLow                As Byte = 1
>    ' Find upper (third) quartile.
>    Const cbytQuartHigh               As Byte = 3
>    ' Find minimum value.
>    Const cbytQuartMinimum            As Byte = 0
>    ' Find maximum value.
>    Const cbytQuartMaximum            As Byte = 4
>    
>    ' Define default operation.
>    Const cbytQuartDefault = cbytQuartMedian
>    
>    Static dbs      As DAO.Database
>    Dim rst         As DAO.Recordset
>    
>    Dim strSQL      As String
>    Dim lngNumber   As Long
>    Dim dblPosition As Double
>    Dim lngPosition As Long
>    Dim dblInterpol As Double
>    Dim dblValueOne As Double
>    Dim dblValueTwo As Double
>    Dim dblQuartile As Double
>    
>    ' Use default calculation if choice of calculation is outside range.
>    If bytQuartile > 4 Then
>      bytQuartile = cbytQuartDefault
>    End If
>    
>    If dbs Is Nothing Then
>      Set dbs = CurrentDb()
>    End If
>
>    If Len(strTable) > 0 And Len(strField) > 0 Then
>      strSQL = "SELECT [" & strField & "] FROM [" & strTable & "] "
>      strSQL = strSQL & "WHERE ([" & strField & "] Is Not Null) "
>      If Len(strFilter) > 0 Then
>        strSQL = strSQL & "AND (" & strFilter & ") "
>      End If
>      strSQL = strSQL & "ORDER BY [" & strField & "];"
>  
>      Set rst = dbs.OpenRecordset(strSQL)
>      
>      With rst
>        If Not .EOF = True Then
>          If bytQuartile = cbytQuartMinimum Then
>            ' No need to count records.
>            lngNumber = 1
>          Else
>            ' Count records.
>            .MoveLast
>            lngNumber = .RecordCount
>          End If
>          Select Case bytQuartile
>            Case cbytQuartMinimum
>              ' Current record is first record.
>              ' Read value of this record.
>            Case cbytQuartMaximum
>              ' Current record is last record.
>              ' Read value of this record.
>            Case cbytQuartMedian
>              ' Locate position of median.
>              dblPosition = (lngNumber + 1) / 2
>            Case cbytQuartLow
>              dblPosition = (lngNumber + 3) / 4
>            Case cbytQuartHigh
>              dblPosition = (3 * lngNumber + 1) / 4
>          End Select
>          Select Case bytQuartile
>            Case cbytQuartMinimum, cbytQuartMaximum
>              ' Read current row.
>            Case Else
>              .MoveFirst
>              ' Find position of first observation to retrieve.
>              ' If lngPosition is 0, then upper position is first record.
>              ' If lngPosition is not 0 and position is not an integer, then
>              ' read the next observation too.
>              lngPosition = Fix(dblPosition)
>              dblInterpol = dblPosition - lngPosition
>              If lngNumber = 1 Then
>                ' Nowhere else to move.
>              ElseIf lngPosition > 1 Then
>                ' Move to record to read.
>                .Move lngPosition - 1
>              End If
>          End Select
>          ' Retrieve value from first observation.
>          dblValueOne = .Fields(0).Value
>          
>          Select Case bytQuartile
>            Case cbytQuartMinimum, cbytQuartMaximum
>              dblQuartile = dblValueOne
>            Case Else
>              If dblInterpol = 0 Then
>                ' Only one observation to read.
>                dblQuartile = dblValueOne
>              Else
>                .MoveNext
>                ' Retrieve value from second observation.
>                dblValueTwo = .Fields(0).Value
>                ' For positive values interpolate between 0 and dblValueOne.
>                ' For negative values interpolate between 2 * dblValueOne and dblValueOne.
>                ' Calculate quartile using linear interpolation.
>                dblQuartile = dblValueOne + dblInterpol * CDec(dblValueTwo - dblValueOne)
>              End If
>          End Select
>        End If
>        .Close
>      End With
>    Else
>      ' Reset.
>      Set dbs = Nothing
>    End If
>      
>    Set rst = Nothing
>
>  Quartile5 = dblQuartile
>
>End Function
>
></code>
>
>The function can easily be converted to ADO but that runs even slower.
>Comments are most welcome.
>
>/gustav
>  
>

-- 
Marty Connelly
Victoria, B.C.
Canada






More information about the AccessD mailing list