Back to all posts

July 1, 2016CODENate Clark

How to Build Software: Accounting Fraud Detection

I've gotten several questions on how you might implement Benford's Law, which was a previous post I wrote. If you haven't read it and you aren't familiar, you'll need to look it over for this to make sense.

As a reminder, non-random numbers (which includes financial transactions) should occur at the following frequencies for their first digits:

How to Build Software: Accounting Fraud Detection

We will keep this C# code fairly straightforward so anybody can try it out. Therefore, rather than a database or anything too fancy, we'll just import values from a CSV file stored at C:\Transactions.csv on your local drive. We will then import each row as a decimal into an array.

private static double[] GetTransactionsFromFile()
{
    try
    {
        string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");
        double[] allTransactions = new double[allLines.Length];

for (int i = 0; i < allLines.Length; i++)<br /> {<br /> double lineValue;<br /> if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))<br /> {<br /> allTransactions[i] = lineValue;<br /> }<br /> else<br /> { <br /> Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");<br /> return null;<br /> }<br /> }

return allTransactions;<br /> }<br /> catch(FileNotFoundException ex)<br /> {<br /> Console.Write(ex.Message);<br /> Console.WriteLine(" Did you copy the example file or create a new file at that location?");<br /> }<br /> return null;<br />}<br /></pre>

This line of code, excerpted from the above section, will properly remove currency and commas, based on the computer's locale.

double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)

This will pull the first digit of every value in the array:

private static double[] GetFirstDigitFrequency(double[] allTransactions)
{
    double[] firstDigitFrequency = new double[10];

for (int i = 0; i < allTransactions.Length; i++)<br /> {<br /> int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));<br /> firstDigitFrequency[firstDigit]++;<br /> }

return firstDigitFrequency;<br />}<br /></pre><br />This will output any first digits that don't fall within a tolerance of 50% above or below the expected rate of occurrence. It omits first digit frequencies that lack any transactions and also throws a warning if you don't have at least 25 transactions.<br /><pre class="brush: csharp; title: ; notranslate" title=""><br />private static void OutputResults(double[] firstDigitFrequency)<br />{<br /> double allowedTolerance = 0.5;<br /> double total = firstDigitFrequency.Sum();<br /> double frequencyPercentage;<br /> double benfordPercentage;

if (total < 25)<br /> Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");

for (int i = 1; i < 10; i++)<br /> {<br /> if (firstDigitFrequency[i] == 0)<br /> break;<br /> frequencyPercentage = firstDigitFrequency[i] / total;<br /> benfordPercentage = Math.Log10(1 + 1.0 / i);<br /> if ((frequencyPercentage < (benfordPercentage (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage (1 + allowedTolerance))))<br /> Console.WriteLine("Review transactions with a first digit of: " + i);<br /> }

Console.WriteLine("Complete.");<br />}<br /></pre>

The complete source code:

using System;
using System.Globalization;
using System.IO;
using System.Linq;
namespace ClarkOnCode_FraudDetection
{
    class Program
    {
        static void Main(string[] args)
        {
            double[] allTransactions = GetTransactionsFromFile();
            double[] firstDigitFrequency = GetFirstDigitFrequency(allTransactions);

OutputResults(firstDigitFrequency);<br /> Console.ReadLine();<br /> }

private static void OutputResults(double[] firstDigitFrequency)<br /> {<br /> double allowedTolerance = 0.5;<br /> double total = firstDigitFrequency.Sum();<br /> double frequencyPercentage;<br /> double benfordPercentage;

if (total < 25)<br /> Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");

for (int i = 1; i < 10; i++)<br /> {<br /> if (firstDigitFrequency[i] == 0)<br /> break;<br /> frequencyPercentage = firstDigitFrequency[i] / total;<br /> benfordPercentage = Math.Log10(1 + 1.0 / i);<br /> if ((frequencyPercentage < (benfordPercentage (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage (1 + allowedTolerance))))<br /> Console.WriteLine("Review transactions with a first digit of: " + i);<br /> }

Console.WriteLine("Complete.");<br /> }

private static double[] GetFirstDigitFrequency(double[] allTransactions)<br /> {<br /> double[] firstDigitFrequency = new double[10];<br /> for (int i = 0; i < allTransactions.Length; i++)<br /> {<br /> int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));<br /> firstDigitFrequency[firstDigit]++;<br /> }<br /> return firstDigitFrequency;<br /> }

private static double[] GetTransactionsFromFile()<br /> {<br /> try<br /> {<br /> string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");<br /> double[] allTransactions = new double[allLines.Length];

for (int i = 0; i < allLines.Length; i++)<br /> {<br /> double lineValue;<br /> if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))<br /> {<br /> allTransactions[i] = lineValue;<br /> }<br /> else<br /> { <br /> Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");<br /> return null;<br /> }<br /> }

return allTransactions;<br /> }<br /> catch(FileNotFoundException ex)<br /> {<br /> Console.Write(ex.Message);<br /> Console.WriteLine("Did you copy the example file or create a new file at that location?");<br /> }<br /> return null;<br /> }<br /> }<br />}<br /></pre>

Keep Reading

A few more posts worth opening next