How to Build Software: Accounting Fraud Detection

I've gotten several questions on you might implement Benford's Law, which was a previous post I wrote. If you haven't read it and you aren't familiar, you'll need to look it over for this to make sense.

As a reminder, non-random numbers (which includes financial transactions) should occur at the following frequencies for their first digits:

How to Build Software: Accounting Fraud Detection

We will keep this C# code fairly straightforward so anybody can try it out. Therefore, rather than a database or anything too fancy, we'll just import values from a CSV file stored at C:\Transactions.csv on your local drive. We will then import each row as a decimal into an array.

        private static double[] GetTransactionsFromFile()
        {
            try
            {
                string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");
                double[] allTransactions = new double[allLines.Length];
                    
                for (int i = 0; i < allLines.Length; i++)
                {
                    double lineValue;
                    if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))
                    {
                        allTransactions[i] = lineValue;
                    }
                    else
                    {   
                        Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");
                        return null;
                    }
                }
                        
                return allTransactions;
            }
            catch(FileNotFoundException ex)
            {
                Console.Write(ex.Message);
                Console.WriteLine(" Did you copy the example file or create a new file at that location?");
            }
            return null;
        }
        

This line of code, excerpted from the above section, will properly remove currency and commas, based on the computer's locale.

        double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)
        

This will pull the first digit of every value in the array:

        private static double[] GetFirstDigitFrequency(double[] allTransactions)
        {
            double[] firstDigitFrequency = new double[10];
            
            for (int i = 0; i < allTransactions.Length; i++)
            {
                int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));
                firstDigitFrequency[firstDigit]++;
            }

            return firstDigitFrequency;
        }
        

This will output any first digits that don't fall within a tolerance of 50% above or below the expected rate of occurrence. It omits first digit frequencies that lack any transactions and also throws a warning if you don't have at least 25 transactions.

        private static void OutputResults(double[] firstDigitFrequency)
        {
            double allowedTolerance = 0.5;
            double total = firstDigitFrequency.Sum();
            double frequencyPercentage;
            double benfordPercentage;
                    
            if (total < 25)
                Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");
    
            for (int i = 1; i < 10; i++)
            {
                if (firstDigitFrequency[i] == 0)
                    break;
                frequencyPercentage = firstDigitFrequency[i] / total;
                benfordPercentage = Math.Log10(1 + 1.0 / i);
                if ((frequencyPercentage < (benfordPercentage * (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage * (1 + allowedTolerance))))
                    Console.WriteLine("Review transactions with a first digit of: " + i);
            }
                    
            Console.WriteLine("Complete.");
        }
        

The complete source code:

        using System;
        using System.Globalization;
        using System.IO;
        using System.Linq;
        namespace ClarkOnCode_FraudDetection
        {
            class Program
            {
                static void Main(string[] args)
                {
                    double[] allTransactions = GetTransactionsFromFile();
                    double[] firstDigitFrequency = GetFirstDigitFrequency(allTransactions);
                    
                    OutputResults(firstDigitFrequency);
                    Console.ReadLine();
                }
                
                private static void OutputResults(double[] firstDigitFrequency)
                {
                    double allowedTolerance = 0.5;
                    double total = firstDigitFrequency.Sum();
                    double frequencyPercentage;
                    double benfordPercentage;
                    
                    if (total < 25)
                        Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test.");
    
                    for (int i = 1; i < 10; i++)
                    {
                        if (firstDigitFrequency[i] == 0)
                            break;
                        frequencyPercentage = firstDigitFrequency[i] / total;
                        benfordPercentage = Math.Log10(1 + 1.0 / i);
                        if ((frequencyPercentage < (benfordPercentage * (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage * (1 + allowedTolerance))))
                            Console.WriteLine("Review transactions with a first digit of: " + i);
                    }
                    
                    Console.WriteLine("Complete.");
                }
                
                private static double[] GetFirstDigitFrequency(double[] allTransactions)
                {
                    double[] firstDigitFrequency = new double[10];
                    for (int i = 0; i < allTransactions.Length; i++)
                    {
                        int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1));
                        firstDigitFrequency[firstDigit]++;
                    }
                    return firstDigitFrequency;
                }
                
                private static double[] GetTransactionsFromFile()
                {
                    try
                    {
                        string[] allLines = File.ReadAllLines(@"C:\Transactions.csv");
                        double[] allTransactions = new double[allLines.Length];
                    
                        for (int i = 0; i < allLines.Length; i++)
                        {
                            double lineValue;
                            if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue))
                            {
                                allTransactions[i] = lineValue;
                            }
                            else
                            {   
                                Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program.");
                                return null;
                            }
                        }
                        
                        return allTransactions;
                    }
                    catch(FileNotFoundException ex)
                    {
                        Console.Write(ex.Message);
                        Console.WriteLine(" Did you copy the example file or create a new file at that location?");
                    }
                    return null;
                }
            }
        }