226 lines
7.0 KiB
Perl
226 lines
7.0 KiB
Perl
use strict;
|
|
use warnings;
|
|
use feature 'say';
|
|
use v5.26;
|
|
use feature qw(signatures);
|
|
no warnings qw(experimental::signatures);
|
|
|
|
use HTML::TableExtract;
|
|
use Mojo::UserAgent;
|
|
use Mojo::JSON qw(j encode_json);
|
|
use Excel::Writer::XLSX;
|
|
use Text::CSV;
|
|
use DDP;
|
|
use SQL::Abstract::Pg;
|
|
use Time::Piece;
|
|
# use Net::SSH::Tunnel;
|
|
|
|
use lib '/mnt/vol/dev/pc/Packages/';
|
|
use sqlConn;
|
|
|
|
|
|
|
|
# init SSH Tunnel
|
|
# system ("ssh -fNT -L 5433:localhost:5432 ubuntu\@18.214.5.20 -i /home/superfly/.ssh/nehantic-dev.pem -p 2222");
|
|
|
|
#$initialize DB
|
|
my $dbh = sqlConn->new( db => 'pc' )->dbh;
|
|
my $sql = SQL::Abstract->new();
|
|
my $sql_tbl = 'pc.opportunity';
|
|
|
|
my $base_url = 'http://www.mybidmatch.com';
|
|
my $bm_home = 'http://www.mybidmatch.com/go?sub=058FAE4A-456C-4FF5-BA74-8EDD033DE87D';
|
|
|
|
##find previous
|
|
my ($stmt, @bind) = $sql->select($sql_tbl, 'notif_url');
|
|
my $sth = $dbh->prepare($stmt);
|
|
$sth->execute(@bind);
|
|
my $prev = $sth->fetchall_hashref('notif_url');
|
|
|
|
|
|
my $bm = extract_bidmatch($bm_home);
|
|
for my $day (@$bm) {
|
|
"a" =~ /a/; ## Reset captures to undef.
|
|
my $url = $day->[0];
|
|
my $date = $day->[1];
|
|
next if $prev->{$url};
|
|
my $results = get_results($url);
|
|
map {push @$_, $url} @$results;
|
|
map {push @$_, $date} @$results;
|
|
my $sam_data = get_sam_data($results);
|
|
insert_rows($results);
|
|
}
|
|
refresh_report();
|
|
|
|
sub refresh_report() {
|
|
my $sql = q'INSERT INTO pc.opportunity_report
|
|
SELECT 1, e.* FROM pc.opportunity_extract e LEFT JOIN pc.opportunity_report r USING (id)
|
|
WHERE r.id IS NULL';
|
|
$dbh->do($sql);
|
|
}
|
|
|
|
sub get_results($url) {
|
|
my $ua = Mojo::UserAgent->new;
|
|
my $res = $ua->get($url)->result->to_string;
|
|
return if $res =~ /No articles/;
|
|
$res =~ /(<table(?:.*class="data")>?(.|\n)*?<\/table>)/gm;
|
|
return extract_table($1);
|
|
}
|
|
|
|
|
|
sub extract_table($html) {
|
|
my $te = HTML::TableExtract->new(keep_html => 1, headers => [qw(# Source Agency FSG Title Keywords)]);
|
|
$te->parse($html);
|
|
|
|
my @results;
|
|
# Examine all matching tables
|
|
for my $ts ($te->tables) {
|
|
for my $row ($ts->rows) {
|
|
$row->[4] =~ s/href=\"/href=\"$base_url/;
|
|
$row->[4] =~ /<a href="(.*)">\s+?(.*)<\/a>/;
|
|
$row->[4] = $2;
|
|
$row->[6] = $1;
|
|
push(@results, $row);
|
|
}
|
|
}
|
|
return \@results
|
|
}
|
|
|
|
|
|
sub get_sam_data ($results) {
|
|
for my $row (@$results) {
|
|
my $bidmatch_link = $row->[6];
|
|
my $sam_link = get_doc($bidmatch_link);
|
|
$sam_link =~ m|https://beta.sam.gov/opp/(.*)/view|;
|
|
my $sam_json_link = "https://beta.sam.gov/api/prod/opps/v2/opportunities/$1";
|
|
|
|
if ($sam_link) {
|
|
my $sam_json = get_sam_json($sam_json_link);
|
|
push @$row, encode_json ($sam_json);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub insert_rows ($results) {
|
|
my $sam_link = $results->[6];
|
|
my @headers = qw(id source agency fsg title keywords oppy_url notif_url date oppy_det);
|
|
my %data;
|
|
|
|
my %psc_map = (
|
|
A => 'Research and Development',
|
|
B => 'Special Studies and Analyses - Not R&D',
|
|
C => 'Architect and Engineering - Construction',
|
|
D => 'Automatic Data Processing and Telecommunication',
|
|
E => 'Purchase of Structures and Facilities',
|
|
F => 'Natural Resources and Conservation',
|
|
G => 'Social Services',
|
|
H => 'Quality Control, Testing, and Inspection',
|
|
J => 'Maintenance, Repair, and Rebuilding of Equipment',
|
|
K => 'Modification of Equipment',
|
|
L => 'Technical Representative',
|
|
M => 'Operation of Government Owned Facilities',
|
|
N => 'Installation of Equipment',
|
|
P => 'Salvage Services',
|
|
Q => 'Medical Services',
|
|
R => 'Professional, Administrative and Management Support',
|
|
S => 'Utilities and Housekeeping Services',
|
|
T => 'Photographic, Mapping, Printing, and Publications',
|
|
U => 'Education and Training',
|
|
V => 'Transportation, Travel and Relocation',
|
|
W => 'Lease or Rental of Equipment',
|
|
X => 'Lease or Rental of Facilities',
|
|
Y => 'Construction of Structures and Facilities',
|
|
Z => 'Maintenance, Repair or Alteration of Real Property',);
|
|
|
|
|
|
for my $row (@$results) {
|
|
@data{@headers} = @$row;
|
|
my %ins_data = (
|
|
notif_source => $data{'source'},
|
|
notif_date => $data{'date'},
|
|
notif_url => $data{'notif_url'},
|
|
notif_details => $data{'title'},
|
|
oppty_desc => $psc_map{$data{'fsg'}},
|
|
oppty_source => $data{'agency'},
|
|
oppty_date => '',
|
|
oppty_url => $data{'oppy_url'},
|
|
oppty_details => $data{'oppy_det'},
|
|
);
|
|
my ($stmt, @bind) = $sql->insert($sql_tbl, \%ins_data, {on_conflict => \'do nothing'});
|
|
my $sth = $dbh->prepare($stmt);
|
|
$sth->execute(@bind);
|
|
}
|
|
}
|
|
|
|
|
|
sub extract_bidmatch($url) {
|
|
my $ua = Mojo::UserAgent->new;
|
|
my $res = $ua->get($url)->result->to_string;
|
|
$res =~ /(<table(?:.*class="data")>?(.|\n)*?<\/table>)/gm;
|
|
my $html = $1;
|
|
my $te = HTML::TableExtract->new(keep_html=>1, headers=>[qw(Date Articles Read)]);
|
|
$te->parse($html);
|
|
|
|
my @results;
|
|
# Examine all matching tables
|
|
for my $ts ($te->tables) {
|
|
for my $row ($ts->rows) {
|
|
|
|
my $sel1 = $row->[0];
|
|
$sel1 =~ /<a href="(\/.*)">?(.*)<\/a>/;
|
|
my $bm_det = "http://www.mybidmatch.com$1";
|
|
|
|
my $sel2 = $2;
|
|
$sel2 =~ /.*, (\w{3} \d{1,2}), \d{4}$/;
|
|
#my $date = $1;
|
|
my $date = Time::Piece->strptime($1 . ' 2019', '%b %d %Y')->ymd;
|
|
push(@results, [$bm_det, $date]);
|
|
}
|
|
}
|
|
return \@results
|
|
}
|
|
|
|
|
|
sub write_csv ($rows, $date) {
|
|
say "writing csv";
|
|
my $csv = Text::CSV->new({binary=>1, auto_diag=>1, eol => $/});
|
|
open my $fh, ">:encoding(utf8)", "/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch_$date.csv" or die "prime_contacts.csv: $!";
|
|
my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match);
|
|
$csv->say ($fh, \@header);
|
|
|
|
for my $row (@$rows) {
|
|
$csv->say ($fh, $row);
|
|
}
|
|
|
|
close $fh or die "bidmatch.csv: $!";
|
|
}
|
|
|
|
|
|
sub write_xlsx ($rows, $date) {
|
|
say "writing xlsx";
|
|
open my $fh, ">>", '/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch.xlsx' or die "Open Failed: $!";
|
|
my $xlsx = Excel::Writer::XLSX->new($fh);
|
|
my $worksheet = $xlsx->add_worksheet($date);
|
|
my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match);
|
|
$worksheet->write_row( 0, 0, \@header);
|
|
$worksheet->write_col( 1, 0, $rows);
|
|
$xlsx->close;
|
|
}
|
|
|
|
|
|
sub get_doc ($url) {
|
|
say $url;
|
|
my $ua = Mojo::UserAgent->new;
|
|
my $res = $ua->get($url)->result->to_string;
|
|
$res =~ /URL: <a href="(.*)">/;
|
|
return $1;
|
|
}
|
|
|
|
|
|
sub get_sam_json ($sam_link) {
|
|
# https://beta.sam.gov/opp/57c1b3ccd33a4635b3a9decc5b015d0e/view
|
|
# https://beta.sam.gov/api/prod/opps/v2/opportunities/57c1b3ccd33a4635b3a9decc5b015d0e
|
|
my $ua = Mojo::UserAgent->new;
|
|
my $res = $ua->get($sam_link)->result->json;
|
|
return $res;
|
|
} |