use strict;
use warnings;
use feature 'say';
use v5.26;
use feature qw(signatures);
no warnings qw(experimental::signatures);
use HTML::TableExtract;
use Mojo::UserAgent;
use Mojo::JSON qw(j encode_json);
use Excel::Writer::XLSX;
use Text::CSV;
use DDP;
use SQL::Abstract::Pg;
use Time::Piece;
# use Net::SSH::Tunnel;
use lib '/mnt/vol/dev/pc/Packages/';
use sqlConn;
# init SSH Tunnel
# system ("ssh -fNT -L 5433:localhost:5432 ubuntu\@18.214.5.20 -i /home/superfly/.ssh/nehantic-dev.pem -p 2222");
#$initialize DB
my $dbh = sqlConn->new( db => 'pc' )->dbh;
my $sql = SQL::Abstract->new();
my $sql_tbl = 'pc.opportunity';
my $base_url = 'http://www.mybidmatch.com';
my $bm_home = 'http://www.mybidmatch.com/go?sub=058FAE4A-456C-4FF5-BA74-8EDD033DE87D';
##find previous
my ($stmt, @bind) = $sql->select($sql_tbl, 'notif_url');
my $sth = $dbh->prepare($stmt);
$sth->execute(@bind);
my $prev = $sth->fetchall_hashref('notif_url');
my $bm = extract_bidmatch($bm_home);
for my $day (@$bm) {
"a" =~ /a/; ## Reset captures to undef.
my $url = $day->[0];
my $date = $day->[1];
next if $prev->{$url};
my $results = get_results($url);
map {push @$_, $url} @$results;
map {push @$_, $date} @$results;
my $sam_data = get_sam_data($results);
insert_rows($results);
}
refresh_report();
sub refresh_report() {
my $sql = q'INSERT INTO pc.opportunity_report
SELECT 1, e.* FROM pc.opportunity_extract e LEFT JOIN pc.opportunity_report r USING (id)
WHERE r.id IS NULL';
$dbh->do($sql);
}
sub get_results($url) {
my $ua = Mojo::UserAgent->new;
my $res = $ua->get($url)->result->to_string;
return if $res =~ /No articles/;
$res =~ /(
?(.|\n)*?<\/table>)/gm;
return extract_table($1);
}
sub extract_table($html) {
my $te = HTML::TableExtract->new(keep_html => 1, headers => [qw(# Source Agency FSG Title Keywords)]);
$te->parse($html);
my @results;
# Examine all matching tables
for my $ts ($te->tables) {
for my $row ($ts->rows) {
$row->[4] =~ s/href=\"/href=\"$base_url/;
$row->[4] =~ /\s+?(.*)<\/a>/;
$row->[4] = $2;
$row->[6] = $1;
push(@results, $row);
}
}
return \@results
}
sub get_sam_data ($results) {
for my $row (@$results) {
my $bidmatch_link = $row->[6];
my $sam_link = get_doc($bidmatch_link);
$sam_link =~ m|https://beta.sam.gov/opp/(.*)/view|;
my $sam_json_link = "https://beta.sam.gov/api/prod/opps/v2/opportunities/$1";
if ($sam_link) {
my $sam_json = get_sam_json($sam_json_link);
push @$row, encode_json ($sam_json);
}
}
}
sub insert_rows ($results) {
my $sam_link = $results->[6];
my @headers = qw(id source agency fsg title keywords oppy_url notif_url date oppy_det);
my %data;
my %psc_map = (
A => 'Research and Development',
B => 'Special Studies and Analyses - Not R&D',
C => 'Architect and Engineering - Construction',
D => 'Automatic Data Processing and Telecommunication',
E => 'Purchase of Structures and Facilities',
F => 'Natural Resources and Conservation',
G => 'Social Services',
H => 'Quality Control, Testing, and Inspection',
J => 'Maintenance, Repair, and Rebuilding of Equipment',
K => 'Modification of Equipment',
L => 'Technical Representative',
M => 'Operation of Government Owned Facilities',
N => 'Installation of Equipment',
P => 'Salvage Services',
Q => 'Medical Services',
R => 'Professional, Administrative and Management Support',
S => 'Utilities and Housekeeping Services',
T => 'Photographic, Mapping, Printing, and Publications',
U => 'Education and Training',
V => 'Transportation, Travel and Relocation',
W => 'Lease or Rental of Equipment',
X => 'Lease or Rental of Facilities',
Y => 'Construction of Structures and Facilities',
Z => 'Maintenance, Repair or Alteration of Real Property',);
for my $row (@$results) {
@data{@headers} = @$row;
my %ins_data = (
notif_source => $data{'source'},
notif_date => $data{'date'},
notif_url => $data{'notif_url'},
notif_details => $data{'title'},
oppty_desc => $psc_map{$data{'fsg'}},
oppty_source => $data{'agency'},
oppty_date => '',
oppty_url => $data{'oppy_url'},
oppty_details => $data{'oppy_det'},
);
my ($stmt, @bind) = $sql->insert($sql_tbl, \%ins_data, {on_conflict => \'do nothing'});
my $sth = $dbh->prepare($stmt);
$sth->execute(@bind);
}
}
sub extract_bidmatch($url) {
my $ua = Mojo::UserAgent->new;
my $res = $ua->get($url)->result->to_string;
$res =~ /(?(.|\n)*?<\/table>)/gm;
my $html = $1;
my $te = HTML::TableExtract->new(keep_html=>1, headers=>[qw(Date Articles Read)]);
$te->parse($html);
my @results;
# Examine all matching tables
for my $ts ($te->tables) {
for my $row ($ts->rows) {
my $sel1 = $row->[0];
$sel1 =~ /?(.*)<\/a>/;
my $bm_det = "http://www.mybidmatch.com$1";
my $sel2 = $2;
$sel2 =~ /.*, (\w{3} \d{1,2}), \d{4}$/;
#my $date = $1;
my $date = Time::Piece->strptime($1 . ' 2019', '%b %d %Y')->ymd;
push(@results, [$bm_det, $date]);
}
}
return \@results
}
sub write_csv ($rows, $date) {
say "writing csv";
my $csv = Text::CSV->new({binary=>1, auto_diag=>1, eol => $/});
open my $fh, ">:encoding(utf8)", "/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch_$date.csv" or die "prime_contacts.csv: $!";
my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match);
$csv->say ($fh, \@header);
for my $row (@$rows) {
$csv->say ($fh, $row);
}
close $fh or die "bidmatch.csv: $!";
}
sub write_xlsx ($rows, $date) {
say "writing xlsx";
open my $fh, ">>", '/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch.xlsx' or die "Open Failed: $!";
my $xlsx = Excel::Writer::XLSX->new($fh);
my $worksheet = $xlsx->add_worksheet($date);
my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match);
$worksheet->write_row( 0, 0, \@header);
$worksheet->write_col( 1, 0, $rows);
$xlsx->close;
}
sub get_doc ($url) {
say $url;
my $ua = Mojo::UserAgent->new;
my $res = $ua->get($url)->result->to_string;
$res =~ /URL: /;
return $1;
}
sub get_sam_json ($sam_link) {
# https://beta.sam.gov/opp/57c1b3ccd33a4635b3a9decc5b015d0e/view
# https://beta.sam.gov/api/prod/opps/v2/opportunities/57c1b3ccd33a4635b3a9decc5b015d0e
my $ua = Mojo::UserAgent->new;
my $res = $ua->get($sam_link)->result->json;
return $res;
}