How to Build Software: Accounting Fraud Detection
I've gotten several questions on how you might implement Benford's Law, which was a previous post I wrote. If you haven't read it and you aren't familiar, you'll need to look it over for this to make sense.
As a reminder, non-random numbers (which includes financial transactions) should occur at the following frequencies for their first digits:
We will keep this C# code fairly straightforward so anybody can try it out. Therefore, rather than a database or anything too fancy, we'll just import values from a CSV file stored at C:\Transactions.csv on your local drive. We will then import each row as a decimal into an array.
private static double[] GetTransactionsFromFile()
{
try
{
string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");
double[] allTransactions = new double[allLines.Length];
for (int i = 0; i < allLines.Length; i++)<br /> {<br /> double lineValue;<br /> if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))<br /> {<br /> allTransactions[i] = lineValue;<br /> }<br /> else<br /> { <br /> Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");<br /> return null;<br /> }<br /> }
return allTransactions;<br /> }<br /> catch(FileNotFoundException ex)<br /> {<br /> Console.Write(ex.Message);<br /> Console.WriteLine(" Did you copy the example file or create a new file at that location?");<br /> }<br /> return null;<br />}<br /></pre>
This line of code, excerpted from the above section, will properly remove currency and commas, based on the computer's locale.
double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)
This will pull the first digit of every value in the array:
private static double[] GetFirstDigitFrequency(double[] allTransactions)
{
double[] firstDigitFrequency = new double[10];
for (int i = 0; i < allTransactions.Length; i++)<br /> {<br /> int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));<br /> firstDigitFrequency[firstDigit]++;<br /> }
return firstDigitFrequency;<br />}<br /></pre><br />This will output any first digits that don't fall within a tolerance of 50% above or below the expected rate of occurrence. It omits first digit frequencies that lack any transactions and also throws a warning if you don't have at least 25 transactions.<br /><pre class="brush: csharp; title: ; notranslate" title=""><br />private static void OutputResults(double[] firstDigitFrequency)<br />{<br /> double allowedTolerance = 0.5;<br /> double total = firstDigitFrequency.Sum();<br /> double frequencyPercentage;<br /> double benfordPercentage;
if (total < 25)<br /> Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");
for (int i = 1; i < 10; i++)<br /> {<br /> if (firstDigitFrequency[i] == 0)<br /> break;<br /> frequencyPercentage = firstDigitFrequency[i] / total;<br /> benfordPercentage = Math.Log10(1 + 1.0 / i);<br /> if ((frequencyPercentage < (benfordPercentage (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage (1 + allowedTolerance))))<br /> Console.WriteLine("Review transactions with a first digit of: " + i);<br /> }
Console.WriteLine("Complete.");<br />}<br /></pre>
The complete source code:
using System;
using System.Globalization;
using System.IO;
using System.Linq;
namespace ClarkOnCode_FraudDetection
{
class Program
{
static void Main(string[] args)
{
double[] allTransactions = GetTransactionsFromFile();
double[] firstDigitFrequency = GetFirstDigitFrequency(allTransactions);
OutputResults(firstDigitFrequency);<br /> Console.ReadLine();<br /> }
private static void OutputResults(double[] firstDigitFrequency)<br /> {<br /> double allowedTolerance = 0.5;<br /> double total = firstDigitFrequency.Sum();<br /> double frequencyPercentage;<br /> double benfordPercentage;
if (total < 25)<br /> Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");
for (int i = 1; i < 10; i++)<br /> {<br /> if (firstDigitFrequency[i] == 0)<br /> break;<br /> frequencyPercentage = firstDigitFrequency[i] / total;<br /> benfordPercentage = Math.Log10(1 + 1.0 / i);<br /> if ((frequencyPercentage < (benfordPercentage (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage (1 + allowedTolerance))))<br /> Console.WriteLine("Review transactions with a first digit of: " + i);<br /> }
Console.WriteLine("Complete.");<br /> }
private static double[] GetFirstDigitFrequency(double[] allTransactions)<br /> {<br /> double[] firstDigitFrequency = new double[10];<br /> for (int i = 0; i < allTransactions.Length; i++)<br /> {<br /> int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));<br /> firstDigitFrequency[firstDigit]++;<br /> }<br /> return firstDigitFrequency;<br /> }
private static double[] GetTransactionsFromFile()<br /> {<br /> try<br /> {<br /> string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");<br /> double[] allTransactions = new double[allLines.Length];
for (int i = 0; i < allLines.Length; i++)<br /> {<br /> double lineValue;<br /> if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))<br /> {<br /> allTransactions[i] = lineValue;<br /> }<br /> else<br /> { <br /> Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");<br /> return null;<br /> }<br /> }
return allTransactions;<br /> }<br /> catch(FileNotFoundException ex)<br /> {<br /> Console.Write(ex.Message);<br /> Console.WriteLine("Did you copy the example file or create a new file at that location?");<br /> }<br /> return null;<br /> }<br /> }<br />}<br /></pre>


