5c4f57eaa118e250013c5f614f2f59964c332642
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

1) <?php
2) 
3) # this script converts the IBAN_registry.txt file's entries to registry.txt format (php-iban's required internal format).
4) 
5) # init
6) require_once(dirname(dirname(__FILE__)) . '/php-iban.php');
7) date_default_timezone_set('UTC'); # mutes a warning
8) 
9) # read registry
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

10) $data = `iconv -f utf8 -t ascii --byte-subst="<0x%x>" --unicode-subst="<U+%04X>" 'IBAN_Registry.txt'`;
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

11) if($data == '') { die("Couldn't read IBAN_Registry.txt - try downloading from the location described in the REGISTRY-URL file."); }
12) 
13) # print header line
14) print "country_code|country_name|domestic_example|bban_example|bban_format_swift|bban_format_regex|bban_length|iban_example|iban_format_swift|iban_format_regex|iban_length|bban_bankid_start_offset|bban_bankid_stop_offset|bban_branchid_start_offset|bban_branchid_stop_offset|registry_edition|country_sepa\n";
15) 
16) # break in to lines
17) $lines = preg_split('/[\r\n]+/',$data);
18) 
19) # display
20) foreach($lines as $line) {
21)  # if it's not a blank line, and it's not the header row
22)  if($line != '' && !preg_match('/SEPA Country/',$line)) {
23)   # extract individual tab-separated fields
24)   $bits = explode("\t",$line);
25)   # remove quotes and superfluous whitespace on fields that have them.
26)   for($i=0;$i<count($bits);$i++) {
27)    $bits[$i] = preg_replace('/^"(.*)"$/','$1',$bits[$i]);
28)    $bits[$i] = preg_replace('/^ */','',$bits[$i]);
29)    $bits[$i] = preg_replace('/ *$/','',$bits[$i]);
30)   }
31)   # assigned fields to named variables
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

32) #  print "-------\n";
33) #  print $line;
34) #  print "-------\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

35)   list($country_name,$country_code,$domestic_example,$bban,$bban_structure,$bban_length,$bban_bi_position,$bban_bi_length,$bban_bi_example,$bban_example,$iban,$iban_structure,$iban_length,$iban_electronic_example,$iban_print_example,$country_sepa,$contact_details) = $bits;
36)   # sanitise
37)   $country_code = strtoupper(substr($country_code,0,2));       # sanitise comments away
38)   $bban_structure = preg_replace('/[:;]/','',$bban_structure); # errors seen in Germany, Hungary entries
39)   $iban_structure = preg_replace('/, .*$/','',$iban_structure); # duplicates for FO, GL seen in DK
40)   $iban_electronic_example = preg_replace('/, .*$/','',$iban_electronic_example); # duplicates for FO, GL seen in DK
41)   if($country_code=='MU') {
42)    $iban_electronic_example = str_replace(' ','',$iban_electronic_example); # MU example has a spurious space
43)   }
44)   if($country_code=='CZ') {
45)    $iban_electronic_example = preg_replace('/ \w{10,}+$/','',$iban_electronic_example); # extra example for CZ
46)    $iban_print_example = preg_replace('/^(CZ.. .... .... .... .... ....).*$/','$1',$iban_print_example); # extra example
47)   }
48)   if($country_code=='FI') {
49)    # remove additional example
50)    $iban_electronic_example = preg_replace('/ or .*$/','',$iban_electronic_example);
51)    # fix bban example to remove verbosity and match domestic example
52)    $bban = '12345600000785';
53)   }
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

54)   if($country_code=='KZ') {
55)    # fix presence of multiline free-text in KZ IBAN structure field
56)    $iban_structure = '2!a2!n3!n13!c';
57)   }
58)   if($country_code=='QA') {
59)    # fix the lack BBAN structure provision in the TXT format registry
60)    $bban_structure = '4!a4!n17!c';
61)    # fix broken IBAN structure provision
62)    $iban_structure = 'QA2!n4!a4!n17!c';
63)   }
64)   if($country_code=='JO') {
65)    $bban_bi_length=4; # not '4!a' as suggested
66)   }
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

67)   $iban_print_example = preg_replace('/, .*$/','',$iban_print_example); # DK includes FO and GL examples in one record
68) 
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

69)   # drop leading 2!a in iban structure.
70)   #  .. should actually be the country code in question
71)   if(substr($iban_structure,0,3) == '2!a') {
72)    $iban_structure = $country_code . substr($iban_structure,3);
73)   }
74) 
75)   # calculate $bban_regex from $bban_structure
76)   $bban_regex = swift_to_regex($bban_structure);
77)   # calculate $iban_regex from $iban_structure
78)   $iban_regex = swift_to_regex($iban_structure);
79)   print "[DEBUG] got $iban_regex from $iban_structure\n";
80) 
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

81)  # debugging
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

82)  if(true) {
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

83)   print "[$country_name ($country_code)]\n";
84)   print "Domestic account number example: $domestic_example\n";
85)   print "BBAN structure:                  $bban_structure\n";
86)   print "BBAN length:                     $bban_length\n";
87)   print "BBAN bank identifier position:   $bban_bi_position\n";
88)   print "BBAN bank identifier length:     $bban_bi_length\n";
89)   print "BBAN bank identifier example:    $bban_bi_example\n";
90)   print "BBAN example:                    $bban_example\n";
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

91)   print "BBAN regex (calculated):         $bban_regex\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

92)   print "IBAN structure:                  $iban_structure\n";
93)   print "IBAN length:                     $iban_length\n";
94)   print "IBAN electronic format example:  $iban_electronic_example\n";
95)   print "IBAN print format example:       $iban_print_example\n";
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

96)   print "IBAN Regex (calculated):         $iban_regex\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

97)   print "SEPA country:                    $country_sepa\n";
98)   print "Contact details:                 $contact_details\n\n";
99)  }
100) 
101)   # calculate numeric $bban_length
102)   $bban_length = preg_replace('/[^\d]/','',$bban_length);
103)   # calculate numeric $iban_length
104)   $iban_length = preg_replace('/[^\d]/','',$iban_length);
105)   # calculate bban_bankid_<start|stop>_offset
106)   # .... First we have to parse the freetext $bban_bi_position, eg: 
107)   # Bank Identifier 1-3, Branch Identifier
108)   # Position 1-2
109)   # Positions 1-2
110)   # Positions 1-3
111)   # Positions 1-3 ;Branch is not available
112)   # Positions 1-3, Branch identifier
113)   # Positions 1-3, Branch identifier positions
114)   # Positions 1-4
115)   # Positions 1-4, Branch identifier
116)   # Positions 1-4, Branch identifier positions
117)   # Positions 1-5
118)   # Positions 1-5 (positions 1-2 bank identifier; positions 3-5 branch identifier). In case of payment institutions Positions 1-5, Branch identifier positions
119)   # Positions 1-6,  Branch identifier positions
120)   # Positions 1-6. First two digits of bank identifier indicate the bank or banking group (For example, 1 or 2 for Nordea, 31 for Handelsbanken, 5 for cooperative banks etc)
121)   # Positions 1-7
122)   # Positions 1-8
123)   # Positions 2-6, Branch identifier positions
124)   # positions 1-3, Branch identifier positions
125)   #
126)   #  ... our algorithm is as follows:
127)   #   - find all <digit>-<digit> tokens
128)   preg_match_all('/(\d)-(\d\d?)/',$bban_bi_position,$matches);
129)   #   - discard overlaps ({1-5,1-2,3-5} becomes {1-2,3-5})
130)   $tmptokens = array();
131)   for($j=0;$j<count($matches[0]);$j++) {
132)    #print "tmptokens was... " . print_r($tmptokens,1) . "\n";
133)    $from = $matches[1][$j];
134)    $to = $matches[2][$j];
135)    #      (if we don't yet have a match starting here, or it goes further,
136)    #       overwrite the match-from-this-position record)
137)    if(!isset($tmptokens[$from]) || $to < $tmptokens[$from]) {
138)     $tmptokens[$from] = $to;
139)    }
140)   }
141)   unset($matches); # done
142)   #   - assume the token starting from position 1 is the bank identifier
143)   #     (or, if it does not exist, the token starting from position 2)
144)   $bban_bankid_start_offset = 0;              # decrement 1 on assignment
145)   if(isset($tmptokens[1])) {
146)    $bban_bankid_stop_offset = $tmptokens[1]-1; # decrement 1 on assignment
147)    unset($tmptokens[1]);
148)   }
149)   else {
150)    $bban_bankid_stop_offset = $tmptokens[2]-1; # decrement 1 on assignment
151)    unset($tmptokens[2]);
152)   }
153)   #   - assume any subsequent token, if present, is the branch identifier.
154)   $tmpkeys = array_keys($tmptokens);
155)   $start = array_shift($tmpkeys);
156)   unset($tmpkeys); # done
157)   $bban_branchid_start_offset='';
158)   $bban_branchid_stop_offset='';
159)   if($start!= '') {
160)    # we have a branch identifier!
161)    $bban_branchid_start_offset=$start-1;
162)    $bban_branchid_stop_offset=$tmptokens[$start]-1;
163)   }
164)   else {
165)    # (note: this codepath occurs for around two thirds of all records)
166)    # we have not yet found a branch identifier. HOWEVER, we can analyse the
167)    # structure of the BBAN to determine whether there is more than one
168)    # remaining non-tiny field (tiny fields on the end of a BBAN typically
169)    # being checksums) and, if so, assume that the first/shorter one is the
170)    # branch identifier.
171)    $reduced_bban_structure = preg_replace('/^\d+![nac]/','',$bban_structure);
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

172) #   print "[DEBUG] reduced BBAN structure = $reduced_bban_structure\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

173)    $tokens = swift_tokenize($reduced_bban_structure,1);
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

174) #   print "[DEBUG] tokens = " + json_encode($tokens,1);
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

175)    # discard any tokens of length 1 or 2
176)    for($t=0;$t<count($tokens[0]);$t++) {
177)     if($tokens[1][$t] < 3) {
178)      $tokens['discarded'][$t] = 1;
179)     }
180)    }
181)    # interesting fields are those that are not discarded...
182)    if(!isset($tokens['discarded'])) {
183)     $interesting_field_count = count($tokens[0]); }
184)    else {
185)     $interesting_field_count = (count($tokens[0])-count($tokens['discarded']));
186)    }
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

187) #   print "[DEBUG] interesting field count = $interesting_field_count\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

188)    # ...if we have at least two of them, there's a branchid-type field
189)    if($interesting_field_count >= 2) {
190)     # now loop through until we assign the branchid start offset
191)     # (this occurs just after the first non-discarded field)
192)     $found=0;
193)     for($f=0; (($found==0) && ($f<count($tokens[0]))); $f++) {
194)      # if this is a non-discarded token, of >2 length...
195)      if((!isset($tokens['discarded'][$f]) || $tokens['discarded'][$f] != 1) && $tokens[1][$f]>2) {
196)       # ... then assign.
197)       $pre_offset = $bban_bankid_stop_offset+1; # this is the offset before we reduced the structure to remove the bankid field
198)       $bban_branchid_start_offset = $pre_offset + $tokens['offset'][$f];
199)       $bban_branchid_stop_offset  = $pre_offset + $tokens['offset'][$f] + $tokens[1][$f] - 1; # decrement by one on assignment
200)       $found=1;
201)      }
202)     }
203)    }
204)   }
205) 
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

206)   # fix for Jordan
207)   if($country_code == 'JO') {
208)    $bban_bankid_start_offset = 0;
209)    $bban_bankid_stop_offset = 3;
210)    $bban_branchid_start_offset = 4;
211)    $bban_branchid_stop_offset = 7;
212)   }
213) 
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

214)   # calculate 1=Yes, 0=No for $country_sepa
215)   # NOTE: This is buggy due to the free inclusion of random text by the registry publishers.
216)   #       Notably it requires modification for places like Finland and Portugal where these
217)   #       comments are known to exist.
218)   if(strtolower($country_sepa)=='yes') { $country_sepa=1; } else { $country_sepa = 0; }
219)   # set registry edition
220)   $registry_edition = date('Y-m-d');
221) 
222)   # now prepare generate our registry lines...
223)   $to_generate = array($country_code=>$country_name);
224)   if($country_code == 'DK') {
225)    $to_generate = array('DK'=>$country_name,'FO'=>'Faroe Islands','GL'=>'Greenland');
226)   }
227)   elseif($country_code == 'FR') {
228)    $to_generate = array('FR'=>$country_name,'BL'=>'Saint Barthelemy','GF'=>'French Guyana','GP'=>'Guadelope','MF'=>'Saint Martin (French Part)','MQ'=>'Martinique','RE'=>'Reunion','PF'=>'French Polynesia','TF'=>'French Southern Territories','YT'=>'Mayotte','NC'=>'New Caledonia','PM'=>'Saint Pierre et Miquelon','WF'=>'Wallis and Futuna Islands');
229)   }
230) 
231)   # output loop
232)   foreach($to_generate as $country_code=>$country_name) {
233)    # fixes for fields duplicating country code
234)    #print "CHECKSUM-BEFORE[$country_code] = $iban_electronic_example\n";
235)    $iban_electronic_example = iban_set_checksum($country_code .  substr($iban_electronic_example,2));
236)    #print "CHECKSUM-AFTER[$country_code]  = $iban_electronic_example\n";
237)    $iban_structure = $country_code . substr($iban_structure,2);
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

238)    # step 1
239)    $iban_regex_fixed = '^' . $country_code;
240)    $tmp_country_code = substr($iban_regex,1,2);
241)    #print "[DEBUG] $tmp_country_code\n";
242)    # route #1 ... here we are dealing with a country code in the string already
243)    if(preg_match('/^[A-Z][A-Z]$/',$tmp_country_code)) {
244)     #print "[DEBUG] route #1\n";
245)     $iban_regex_fixed = $iban_regex_fixed . substr($iban_regex,3);
246)    }
247)    # route #2 ... here there is no country code yet present
248)    else {
249)     #print "[DEBUG] route #2\n";
250)     $iban_regex_fixed = $iban_regex_fixed . substr($iban_regex,1);
251)    }
252)    #print "[DEBUG] substited '$iban_regex_fixed' for '$iban_regex'\n";
Bernd Wurst Erste Version des SEPA-Mand...

Bernd Wurst authored 11 years ago

253)    # output
Bernd Wurst externals aktualisiert

Bernd Wurst authored 8 years ago

254)    print "$country_code|$country_name|$domestic_example|$bban_example|$bban_structure|$bban_regex|$bban_length|$iban_electronic_example|$iban_structure|$iban_regex_fixed|$iban_length|$bban_bankid_start_offset|$bban_bankid_stop_offset|$bban_branchid_start_offset|$bban_branchid_stop_offset|$registry_edition|$country_sepa\n";